Skip to content

Commit c91b372

Browse files
committed
[df] Avoid unnecessary duplication of JIT lines
When accumulating code to JIT for Define, Filter and Vary nodes, the current logic embeds the name of e.g. the new column to define as part of the function body that will go into the accumulated code to JIT. This in turn means that every time we call a Define, there will be a corresponding code to JIT, as well as a corresponding function body that will go into the newly created cache for the JIT node registrators. This can be optimised by avoiding to store the names as part of the function body, thus allowing a much more aggressive caching. In a simple example such as: ``` ROOT::RDataFrame root(1); std::vector<ROOT::RDF::RResultPtr<double>> dfs; const auto n = 10000; for (auto i = 0u; i < n; i++) { const auto column = "x" + std::to_string(i); auto define = root.Define(column, "42.f"); auto filter = define.Filter(column + " > 0.f"); auto sum = filter.Sum(column); dfs.emplace_back(sum); } dfs[n - 1].GetValue(); ``` This commit changes the previous situation that would have had 10K JIT function bodies, i.e. one per Define with a different column name, down to one function body to JIT for the lambda returning 42.f.
1 parent 022b743 commit c91b372

File tree

4 files changed

+28
-20
lines changed

4 files changed

+28
-20
lines changed

tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ void AddDSColumns(const std::vector<std::string> &requiredCols, ROOT::Detail::RD
469469

470470
// this function is meant to be called by the jitted code generated by BookFilterJit
471471
template <typename F>
472-
void JitFilterHelper(F &&f, const ColumnNames_t &cols, std::string_view name, RColumnRegister &colRegister,
472+
void JitFilterHelper(F &&f, const ColumnNames_t &cols, RColumnRegister &colRegister,
473473
ROOT::Detail::RDF::RLoopManager &lm, ROOT::Detail::RDF::RJittedFilter *jittedFilter) noexcept
474474
{
475475
if (!jittedFilter) {
@@ -493,7 +493,7 @@ void JitFilterHelper(F &&f, const ColumnNames_t &cols, std::string_view name, RC
493493
AddDSColumns(cols, lm, *ds, ColTypes_t(), colRegister);
494494

495495
jittedFilter->SetFilter(
496-
std::unique_ptr<RFilterBase>(new F_t(std::forward<F>(f), cols, prevNode, colRegister, name)));
496+
std::unique_ptr<RFilterBase>(new F_t(std::forward<F>(f), cols, prevNode, colRegister, jittedFilter->GetName())));
497497
}
498498

499499
namespace DefineTypes {
@@ -521,7 +521,7 @@ auto MakeDefineNode(DefineTypes::RDefinePerSampleTag, std::string_view name, std
521521
// This function is meant to be called by jitted code right before starting the event loop.
522522
// If colsPtr is null, build a RDefinePerSample (it has no input columns), otherwise a RDefine.
523523
template <typename RDefineTypeTag, typename F>
524-
void JitDefineHelper(F &&f, const ColumnNames_t &cols, std::string_view name, RColumnRegister &colRegister,
524+
void JitDefineHelper(F &&f, const ColumnNames_t &cols, RColumnRegister &colRegister,
525525
ROOT::Detail::RDF::RLoopManager &lm, ROOT::Detail::RDF::RJittedDefine *jittedDefine) noexcept
526526
{
527527

@@ -543,15 +543,14 @@ void JitDefineHelper(F &&f, const ColumnNames_t &cols, std::string_view name, RC
543543
const auto dummyType = "jittedCol_t";
544544
// use unique_ptr<RDefineBase> instead of make_unique<NewCol_t> to reduce jit/compile-times
545545
std::unique_ptr<RDefineBase> newCol{
546-
MakeDefineNode(RDefineTypeTag{}, name, dummyType, std::forward<F>(f), cols, colRegister, lm)};
546+
MakeDefineNode(RDefineTypeTag{}, jittedDefine->GetName(), dummyType, std::forward<F>(f), cols, colRegister, lm)};
547547
jittedDefine->SetDefine(std::move(newCol));
548548
}
549549

550550
template <bool IsSingleColumn, typename F>
551-
void JitVariationHelper(F &&f, const ColumnNames_t &inputColNames, std::string_view variationName,
552-
RColumnRegister &colRegister, ROOT::Detail::RDF::RLoopManager &lm,
553-
RJittedVariation *jittedVariation, const ColumnNames_t &variedColNames,
554-
const ColumnNames_t &variationTags) noexcept
551+
void JitVariationHelper(F &&f, const ColumnNames_t &inputColNames, RColumnRegister &colRegister,
552+
ROOT::Detail::RDF::RLoopManager &lm, RJittedVariation *jittedVariation,
553+
const ColumnNames_t &variedColNames, const ColumnNames_t &variationTags) noexcept
555554
{
556555

557556
if (!jittedVariation) {
@@ -568,9 +567,9 @@ void JitVariationHelper(F &&f, const ColumnNames_t &inputColNames, std::string_v
568567
AddDSColumns(inputColNames, lm, *ds, ColTypes_t(), colRegister);
569568

570569
// use unique_ptr<RDefineBase> instead of make_unique<NewCol_t> to reduce jit/compile-times
571-
std::unique_ptr<RVariationBase> newVariation{
572-
new RVariation<std::decay_t<F>, IsSingleColumn>(variedColNames, variationName, std::forward<F>(f), variationTags,
573-
jittedVariation->GetTypeName(), colRegister, lm, inputColNames)};
570+
std::unique_ptr<RVariationBase> newVariation{new RVariation<std::decay_t<F>, IsSingleColumn>(
571+
variedColNames, jittedVariation->GetVariationName(), std::forward<F>(f), variationTags,
572+
jittedVariation->GetTypeName(), colRegister, lm, inputColNames)};
574573
jittedVariation->SetVariation(std::move(newVariation));
575574
}
576575

tree/dataframe/inc/ROOT/RDF/RVariationBase.hxx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ namespace RDF {
3939
class RVariationBase {
4040
protected:
4141
std::vector<std::string> fColNames; ///< The names of the varied columns.
42-
std::vector<std::string> fVariationNames; ///< The names of the systematic variation.
42+
std::vector<std::string> fVariationNames; ///< The tags of the systematic variation.
43+
std::string fVariationName; ///< The name of the systematic variation.
4344
std::string fType; ///< The type of the custom column as a text string.
4445
std::vector<Long64_t> fLastCheckedEntry;
4546
RColumnRegister fColumnRegister;
@@ -66,6 +67,7 @@ public:
6667
virtual const std::type_info &GetTypeId() const = 0;
6768
const std::vector<std::string> &GetColumnNames() const;
6869
const std::vector<std::string> &GetVariationNames() const;
70+
const std::string &GetVariationName() const;
6971
std::string GetTypeName() const;
7072
/// Update the value at the address returned by GetValuePtr with the content corresponding to the given entry
7173
virtual void Update(unsigned int slot, Long64_t entry) = 0;

tree/dataframe/src/RDFInterfaceUtils.cxx

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,6 @@ BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> prevNode, std::string_view n
643643
<< "std::shared_ptr<void> *) {\n";
644644
filterInvocation << " ROOT::Internal::RDF::JitFilterHelper(" << funcName << ", "
645645
<< "colNames, "
646-
<< "\"" << name << "\", "
647646
<< "colRegister, "
648647
<< "lm, "
649648
<< "reinterpret_cast<ROOT::Detail::RDF::RJittedFilter*>(jittedFilter)"
@@ -682,7 +681,6 @@ std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_
682681
defineInvocation << " ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>("
683682
<< funcName << ", "
684683
<< "colNames, "
685-
<< "\"" << name << "\", "
686684
<< "colRegister, "
687685
<< "lm, "
688686
<< "reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
@@ -716,7 +714,6 @@ std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std
716714
defineInvocation << " ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
717715
<< funcName << ", "
718716
<< "colNames, "
719-
<< "\"" << name << "\", "
720717
<< "colRegister, "
721718
<< "lm, "
722719
<< "reinterpret_cast<ROOT::Detail::RDF::RJittedDefine *>(jittedDefine)"
@@ -768,7 +765,6 @@ BookVariationJit(const std::vector<std::string> &colNames, std::string_view vari
768765
<< " ROOT::Internal::RDF::JitVariationHelper<" << (isSingleColumn ? "true" : "false") << ">(" << funcName
769766
<< ", "
770767
<< "inputColNames, "
771-
<< "\"" << variationName << "\", "
772768
<< "colRegister, "
773769
<< "lm, "
774770
<< "reinterpret_cast<ROOT::Internal::RDF::RJittedVariation *>(jittedVariation), "

tree/dataframe/src/RVariationBase.cxx

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,19 @@ namespace RDF {
2727
RVariationBase::RVariationBase(const std::vector<std::string> &colNames, std::string_view variationName,
2828
const std::vector<std::string> &variationTags, std::string_view type,
2929
const RColumnRegister &colRegister, RLoopManager &lm, const ColumnNames_t &inputColNames)
30-
: fColNames(colNames), fVariationNames(variationTags), fType(type),
31-
fLastCheckedEntry(lm.GetNSlots() * CacheLineStep<Long64_t>(), -1), fColumnRegister(colRegister), fLoopManager(&lm),
32-
fInputColumns(inputColNames), fIsDefine(inputColNames.size())
30+
: fColNames(colNames),
31+
fVariationNames(variationTags),
32+
fVariationName(variationName),
33+
fType(type),
34+
fLastCheckedEntry(lm.GetNSlots() * CacheLineStep<Long64_t>(), -1),
35+
fColumnRegister(colRegister),
36+
fLoopManager(&lm),
37+
fInputColumns(inputColNames),
38+
fIsDefine(inputColNames.size())
3339
{
3440
// prepend the variation name to each tag
3541
for (auto &tag : fVariationNames)
36-
tag = std::string(variationName) + ':' + tag;
42+
tag = std::string(fVariationName) + ':' + tag;
3743

3844
const auto nColumns = fInputColumns.size();
3945
for (auto i = 0u; i < nColumns; ++i)
@@ -52,6 +58,11 @@ const std::vector<std::string> &RVariationBase::GetVariationNames() const
5258
return fVariationNames;
5359
}
5460

61+
const std::string &RVariationBase::GetVariationName() const
62+
{
63+
return fVariationName;
64+
}
65+
5566
std::string RVariationBase::GetTypeName() const
5667
{
5768
return fType;

0 commit comments

Comments
 (0)