Skip to content

Commit 2dc399f

Browse files
hageboeckguitargeek
andcommitted
[RF] Include evaluateSpan() fallback test in AddPdf benchmark.
When a PDF doesn't provide a fast batch evaluation function (evaluateSpan() overridden), a slower fallback is used. The benchmark for Gauss + Exp using AddPdf is extended to include this fallback test. Further, less iterations are run to speed up the overall execution time, and parameter sets are precomputed in order to avoid starting and stopping the benchmark timers. Co-authored-by: Jonas Rembser <[email protected]>
1 parent 586b6ff commit 2dc399f

File tree

2 files changed

+98
-21
lines changed

2 files changed

+98
-21
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#include "RooAbsPdf.h"
2+
#include "RooRealProxy.h"
3+
4+
class RooRealVar;
5+
class RooAbsReal;
6+
7+
/// A slow implementation of RooExponential that doesn't support batch evaluation.
8+
class SlowRooExponential : public RooAbsPdf {
9+
public:
10+
SlowRooExponential() {}
11+
SlowRooExponential(const char *name, const char *title, RooAbsReal& _x, RooAbsReal& _c) :
12+
RooAbsPdf(name, title),
13+
x("x","Dependent",this,_x),
14+
c("c","Exponent",this,_c) { }
15+
16+
SlowRooExponential(const SlowRooExponential& other, const char* name) :
17+
RooAbsPdf(other, name), x("x",this,other.x), c("c",this,other.c) { }
18+
19+
virtual TObject* clone(const char* newname) const override { return new SlowRooExponential(*this,newname); }
20+
inline virtual ~SlowRooExponential() { }
21+
22+
Int_t getAnalyticalIntegral(RooArgSet& allVars, RooArgSet& analVars, const char* rangeName=0) const override;
23+
Double_t analyticalIntegral(Int_t code, const char* rangeName=0) const override;
24+
25+
protected:
26+
RooRealProxy x;
27+
RooRealProxy c;
28+
29+
Double_t evaluate() const override;
30+
};
31+
32+
#include "RooRealVar.h"
33+
#include "RooBatchCompute.h"
34+
35+
#include <cmath>
36+
using namespace std;
37+
38+
Double_t SlowRooExponential::evaluate() const{
39+
return exp(c*x);
40+
}
41+
42+
Int_t SlowRooExponential::getAnalyticalIntegral(RooArgSet& allVars, RooArgSet& analVars, const char* /*rangeName*/) const
43+
{
44+
if (matchArgs(allVars,analVars,x)) return 1;
45+
if (matchArgs(allVars,analVars,c)) return 2;
46+
return 0 ;
47+
}
48+
49+
Double_t SlowRooExponential::analyticalIntegral(Int_t code, const char* rangeName) const
50+
{
51+
assert(code == 1 || code ==2);
52+
53+
auto& constant = code == 1 ? c : x;
54+
auto& integrand = code == 1 ? x : c;
55+
56+
if (constant == 0.0) {
57+
return integrand.max(rangeName) - integrand.min(rangeName);
58+
}
59+
60+
return (exp(constant*integrand.max(rangeName)) - exp(constant*integrand.min(rangeName)))
61+
/ constant;
62+
}

root/roofit/vectorisedPDFs/benchAddPdf.cxx

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
* Benchmark a simple mock fit model
2020
* sum(x) = frac * Gauss(x) + (1-frac) * Exponential(x)
2121
*
22-
* Run 6 different workflows:
23-
* 0. Evaluate fit model for 2 M data events with batch data loading and SIMD (if compiler flags activated).
22+
* We can run 6 different workflows:
23+
* 0. Evaluate fit model for 1 M data events with batch data loading and SIMD (if compiler flags activated).
2424
* 1. As above, but use old RooFit strategy of single-value data loading.
2525
* 2. Compute probabilities for each data event. That is, run step 0 and normalise values.
2626
* 3. As above, but use old RooFit strategy.
@@ -38,8 +38,10 @@
3838
#include "RooExponential.h"
3939
#include "RooDataSet.h"
4040
#include "RunContext.h"
41-
4241
#include "RooRandom.h"
42+
43+
#include "SlowRooExponential.h"
44+
4345
void randomiseParameters(const RooArgSet& parameters, ULong_t seed=0) {
4446
auto random = RooRandom::randomGenerator();
4547
if (seed != 0)
@@ -61,9 +63,10 @@ enum RunConfig_t {runBatchUnnorm = 0, runSingleUnnorm = 1,
6163

6264

6365
static void benchAddPdfGaussExp(benchmark::State& state) {
64-
RunConfig_t runConfig = static_cast<RunConfig_t>(state.range(0));
65-
constexpr std::size_t nParamSets = 30;
66-
constexpr std::size_t nEvents = 2000000;
66+
const RunConfig_t runConfig = static_cast<RunConfig_t>(state.range(0));
67+
const bool useSlowRooExponential = state.range(1);
68+
constexpr std::size_t nParamSets = 3;
69+
constexpr std::size_t nEvents = 1000000;
6770

6871
// Declare variables x,mean,sigma with associated name, title, initial value and allowed range
6972
RooRealVar x("x", "x", -1.5, 40.5);
@@ -75,10 +78,15 @@ static void benchAddPdfGaussExp(benchmark::State& state) {
7578
RooGaussian gauss("gauss", "gaussian PDF", x, mean, sigma);
7679

7780
RooRealVar c1("c1", "Decay constant", -0.5, -10, -0.001);
78-
RooExponential ex("Pois", "Poisson PDF", x, c1);
81+
std::unique_ptr<RooAbsPdf> ex;
82+
if (useSlowRooExponential) {
83+
ex = std::make_unique<SlowRooExponential>("Pois", "Poisson PDF", x, c1);
84+
} else {
85+
ex = std::make_unique<RooExponential>("Pois", "Poisson PDF", x, c1);
86+
}
7987

8088
RooRealVar fractionGaus("fractionGaus", "Fraction of Gauss component", 0.5, 0., 1.);
81-
RooAddPdf pdf("SumGausPois", "Sum of Gaus and Poisson", RooArgSet(gauss, ex), fractionGaus);
89+
RooAddPdf pdf("SumGausPois", "Sum of Gaus and Poisson", RooArgSet(gauss, *ex), fractionGaus);
8290
// to avoid a warning when computing the unnormalized RooAddPdf values
8391
pdf.fixCoefNormalization(x);
8492

@@ -92,15 +100,19 @@ static void benchAddPdfGaussExp(benchmark::State& state) {
92100
RooBatchCompute::RunContext evalData;
93101
std::vector<double> results(nEvents);
94102

103+
std::array<RooArgSet, nParamSets> paramSets;
104+
unsigned int seed = 1337;
105+
for (auto& paramSet : paramSets) {
106+
randomiseParameters(parameters, seed++);
107+
parameters.snapshot(paramSet);
108+
}
109+
95110
for (auto _ : state) {
96-
for (unsigned int paramSetIndex=0; paramSetIndex < nParamSets; ++paramSetIndex) {
97-
state.PauseTiming();
98-
randomiseParameters(parameters, 1337+paramSetIndex);
99-
state.ResumeTiming();
111+
for (const auto& paramSet : paramSets) {
112+
parameters = paramSet;
100113

101114
evalData.clear();
102115
data->getBatches(evalData, 0, data->numEntries());
103-
runConfig = static_cast<RunConfig_t>(runConfig % 6);
104116

105117
if (runConfig == runBatchUnnorm) {
106118
auto batchResult = pdf.getValues(evalData, nullptr);
@@ -134,14 +146,17 @@ static void benchAddPdfGaussExp(benchmark::State& state) {
134146
}
135147
};
136148

137-
BENCHMARK(benchAddPdfGaussExp)->Unit(benchmark::kMillisecond)
138-
->Args({runBatchUnnorm})
139-
->Args({runSingleUnnorm})
140-
->Args({runBatchNorm})
141-
->Args({runSingleNorm})
142-
->Args({runBatchNormLogs})
143-
->Args({runSingleNormLogs})
144-
;
149+
BENCHMARK(benchAddPdfGaussExp)->Name("Gauss+Exp")->Unit(benchmark::kMillisecond)
150+
->Args({runBatchNorm, false})
151+
->Args({runSingleNorm, false})
152+
->Args({runBatchNormLogs, false})
153+
->Args({runSingleNormLogs, false});
154+
BENCHMARK(benchAddPdfGaussExp)->Name("Gauss+Exp(evaluateSpan fallback)")->Unit(benchmark::kMillisecond)
155+
->Args({runBatchNorm, true})
156+
->Args({runSingleNorm, true})
157+
->Args({runBatchNormLogs, true})
158+
->Args({runSingleNormLogs, true});
159+
145160

146161

147162
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)