Skip to content

Commit 06fe2b7

Browse files
committed
Add RDF tutorial df103 as cpp and py benchmark
1 parent d846d51 commit 06fe2b7

File tree

3 files changed

+755
-0
lines changed

3 files changed

+755
-0
lines changed

root/tree/dataframe/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,17 @@ if(rootbench-datafiles)
2626
DOWNLOAD_DATAFILES Run2012BC_DoubleMuParked_Muons.root
2727
LABEL long)
2828

29+
RB_ADD_GBENCHMARK(gbenchmark-df103_NanoAODHiggsAnalysis
30+
df103_NanoAODHiggsAnalysis.C
31+
DOWNLOAD_DATAFILES Run2012B_DoubleMuParked.root Run2012C_DoubleMuParked.root Run2012B_DoubleElectron.root Run2012C_DoubleElectron.root SMHiggsToZZTo4L.root ZZTo4mu.root ZZTo4e.root
32+
LABEL long
33+
LIBRARIES Core Hist Imt RIO Tree TreePlayer ROOTDataFrame ROOTVecOps Gpad Graf)
34+
35+
RB_ADD_PYTESTBENCHMARK(pytest-df103_NanoAODHiggsAnalysis
36+
df103_NanoAODHiggsAnalysis.py
37+
DOWNLOAD_DATAFILES Run2012B_DoubleMuParked.root Run2012C_DoubleMuParked.root Run2012B_DoubleElectron.root Run2012C_DoubleElectron.root SMHiggsToZZTo4L.root ZZTo4mu.root ZZTo4e.root
38+
LABEL long)
39+
2940
RB_ADD_PYTESTBENCHMARK(pytest-df104_HiggsToTwoPhotons
3041
df104_HiggsToTwoPhotons.py
3142
DOWNLOAD_DATAFILES data_A.GamGam.root data_B.GamGam.root data_C.GamGam.root data_D.GamGam.root mc_343981.ggH125_gamgam.GamGam.root mc_345041.VBFH125_gamgam.GamGam.root
Lines changed: 369 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,369 @@
1+
// This test is an adaption of the RDataFrame tutorial df103 using only the 4el and 4mu channels
2+
3+
#include "ROOT/RDataFrame.hxx"
4+
#include "ROOT/RVec.hxx"
5+
#include "ROOT/RDF/RInterface.hxx"
6+
#include "TCanvas.h"
7+
#include "TH1D.h"
8+
#include "TLatex.h"
9+
#include "TLegend.h"
10+
#include <Math/Vector4Dfwd.h>
11+
#include <Math/GenVector/LorentzVector.h>
12+
#include <Math/GenVector/PtEtaPhiM4D.h>
13+
#include "TStyle.h"
14+
#include <string>
15+
16+
#include "benchmark/benchmark.h"
17+
#include "rootbench/RBConfig.h"
18+
19+
using namespace ROOT::VecOps;
20+
using RNode = ROOT::RDF::RNode;
21+
using rvec_f = const RVec<float> &;
22+
using rvec_i = const RVec<int> &;
23+
24+
const auto z_mass = 91.2;
25+
26+
// Select interesting events with four muons
27+
RNode selection_4mu(RNode df)
28+
{
29+
auto df_ge4m = df.Filter("nMuon>=4", "At least four muons");
30+
auto df_iso = df_ge4m.Filter("All(abs(Muon_pfRelIso04_all)<0.40)", "Require good isolation");
31+
auto df_kin = df_iso.Filter("All(Muon_pt>5) && All(abs(Muon_eta)<2.4)", "Good muon kinematics");
32+
auto df_ip3d = df_kin.Define("Muon_ip3d", "sqrt(Muon_dxy*Muon_dxy + Muon_dz*Muon_dz)");
33+
auto df_sip3d = df_ip3d.Define("Muon_sip3d", "Muon_ip3d/sqrt(Muon_dxyErr*Muon_dxyErr + Muon_dzErr*Muon_dzErr)");
34+
auto df_pv = df_sip3d.Filter("All(Muon_sip3d<4) && All(abs(Muon_dxy)<0.5) && All(abs(Muon_dz)<1.0)",
35+
"Track close to primary vertex with small uncertainty");
36+
auto df_2p2n = df_pv.Filter("nMuon==4 && Sum(Muon_charge==1)==2 && Sum(Muon_charge==-1)==2",
37+
"Two positive and two negative muons");
38+
return df_2p2n;
39+
}
40+
41+
// Select interesting events with four electrons
42+
RNode selection_4el(RNode df)
43+
{
44+
auto df_ge4el = df.Filter("nElectron>=4", "At least our electrons");
45+
auto df_iso = df_ge4el.Filter("All(abs(Electron_pfRelIso03_all)<0.40)", "Require good isolation");
46+
auto df_kin = df_iso.Filter("All(Electron_pt>7) && All(abs(Electron_eta)<2.5)", "Good Electron kinematics");
47+
auto df_ip3d = df_kin.Define("Electron_ip3d", "sqrt(Electron_dxy*Electron_dxy + Electron_dz*Electron_dz)");
48+
auto df_sip3d = df_ip3d.Define("Electron_sip3d",
49+
"Electron_ip3d/sqrt(Electron_dxyErr*Electron_dxyErr + Electron_dzErr*Electron_dzErr)");
50+
auto df_pv = df_sip3d.Filter("All(Electron_sip3d<4) && All(abs(Electron_dxy)<0.5) && "
51+
"All(abs(Electron_dz)<1.0)",
52+
"Track close to primary vertex with small uncertainty");
53+
auto df_2p2n = df_pv.Filter("nElectron==4 && Sum(Electron_charge==1)==2 && Sum(Electron_charge==-1)==2",
54+
"Two positive and two negative electrons");
55+
return df_2p2n;
56+
}
57+
58+
// Reconstruct two Z candidates from four leptons of the same kind
59+
RVec<RVec<size_t>> reco_zz_to_4l(rvec_f pt, rvec_f eta, rvec_f phi, rvec_f mass, rvec_i charge)
60+
{
61+
RVec<RVec<size_t>> idx(2);
62+
idx[0].reserve(2); idx[1].reserve(2);
63+
64+
// Find first lepton pair with invariant mass closest to Z mass
65+
auto idx_cmb = Combinations(pt, 2);
66+
auto best_mass = -1;
67+
size_t best_i1 = 0; size_t best_i2 = 0;
68+
for (size_t i = 0; i < idx_cmb[0].size(); i++) {
69+
const auto i1 = idx_cmb[0][i];
70+
const auto i2 = idx_cmb[1][i];
71+
if (charge[i1] != charge[i2]) {
72+
ROOT::Math::PtEtaPhiMVector p1(pt[i1], eta[i1], phi[i1], mass[i1]);
73+
ROOT::Math::PtEtaPhiMVector p2(pt[i2], eta[i2], phi[i2], mass[i2]);
74+
const auto this_mass = (p1 + p2).M();
75+
if (std::abs(z_mass - this_mass) < std::abs(z_mass - best_mass)) {
76+
best_mass = this_mass;
77+
best_i1 = i1;
78+
best_i2 = i2;
79+
}
80+
}
81+
}
82+
idx[0].emplace_back(best_i1);
83+
idx[0].emplace_back(best_i2);
84+
85+
// Reconstruct second Z from remaining lepton pair
86+
for (size_t i = 0; i < 4; i++) {
87+
if (i != best_i1 && i != best_i2) {
88+
idx[1].emplace_back(i);
89+
}
90+
}
91+
92+
// Return indices of the pairs building two Z bosons
93+
return idx;
94+
}
95+
96+
// Compute Z masses from four leptons of the same kind and sort ascending in distance to Z mass
97+
RVec<float> compute_z_masses_4l(const RVec<RVec<size_t>> &idx, rvec_f pt, rvec_f eta, rvec_f phi, rvec_f mass)
98+
{
99+
RVec<float> z_masses(2);
100+
for (size_t i = 0; i < 2; i++) {
101+
const auto i1 = idx[i][0]; const auto i2 = idx[i][1];
102+
ROOT::Math::PtEtaPhiMVector p1(pt[i1], eta[i1], phi[i1], mass[i1]);
103+
ROOT::Math::PtEtaPhiMVector p2(pt[i2], eta[i2], phi[i2], mass[i2]);
104+
z_masses[i] = (p1 + p2).M();
105+
}
106+
if (std::abs(z_masses[0] - z_mass) < std::abs(z_masses[1] - z_mass)) {
107+
return z_masses;
108+
} else {
109+
return Reverse(z_masses);
110+
}
111+
}
112+
113+
// Compute mass of Higgs from four leptons of the same kind
114+
float compute_higgs_mass_4l(const RVec<RVec<size_t>> &idx, rvec_f pt, rvec_f eta, rvec_f phi, rvec_f mass)
115+
{
116+
const auto i1 = idx[0][0]; const auto i2 = idx[0][1];
117+
const auto i3 = idx[1][0]; const auto i4 = idx[1][1];
118+
ROOT::Math::PtEtaPhiMVector p1(pt[i1], eta[i1], phi[i1], mass[i1]);
119+
ROOT::Math::PtEtaPhiMVector p2(pt[i2], eta[i2], phi[i2], mass[i2]);
120+
ROOT::Math::PtEtaPhiMVector p3(pt[i3], eta[i3], phi[i3], mass[i3]);
121+
ROOT::Math::PtEtaPhiMVector p4(pt[i4], eta[i4], phi[i4], mass[i4]);
122+
return (p1 + p2 + p3 + p4).M();
123+
}
124+
125+
// Apply selection on reconstructed Z candidates
126+
RNode filter_z_candidates(RNode df)
127+
{
128+
auto df_z1_cut = df.Filter("Z_mass[0] > 40 && Z_mass[0] < 120", "Mass of first Z candidate in [40, 120]");
129+
auto df_z2_cut = df_z1_cut.Filter("Z_mass[1] > 12 && Z_mass[1] < 120", "Mass of second Z candidate in [12, 120]");
130+
return df_z2_cut;
131+
}
132+
133+
// Reconstruct Higgs from four muons
134+
RNode reco_higgs_to_4mu(RNode df)
135+
{
136+
// Filter interesting events
137+
auto df_base = selection_4mu(df);
138+
139+
// Reconstruct Z systems
140+
auto df_z_idx =
141+
df_base.Define("Z_idx", reco_zz_to_4l, {"Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass", "Muon_charge"});
142+
143+
// Cut on distance between muons building Z systems
144+
auto filter_z_dr = [](const RVec<RVec<size_t>> &idx, rvec_f eta, rvec_f phi) {
145+
for (size_t i = 0; i < 2; i++) {
146+
const auto i1 = idx[i][0];
147+
const auto i2 = idx[i][1];
148+
const auto dr = DeltaR(eta[i1], eta[i2], phi[i1], phi[i2]);
149+
if (dr < 0.02) {
150+
return false;
151+
}
152+
}
153+
return true;
154+
};
155+
auto df_z_dr =
156+
df_z_idx.Filter(filter_z_dr, {"Z_idx", "Muon_eta", "Muon_phi"}, "Delta R separation of muons building Z system");
157+
158+
// Compute masses of Z systems
159+
auto df_z_mass =
160+
df_z_dr.Define("Z_mass", compute_z_masses_4l, {"Z_idx", "Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass"});
161+
162+
// Cut on mass of Z candidates
163+
auto df_z_cut = filter_z_candidates(df_z_mass);
164+
165+
// Reconstruct H mass
166+
auto df_h_mass =
167+
df_z_cut.Define("H_mass", compute_higgs_mass_4l, {"Z_idx", "Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass"});
168+
169+
return df_h_mass;
170+
}
171+
172+
// Reconstruct Higgs from four electrons
173+
RNode reco_higgs_to_4el(RNode df)
174+
{
175+
// Filter interesting events
176+
auto df_base = selection_4el(df);
177+
178+
// Reconstruct Z systems
179+
auto df_z_idx = df_base.Define("Z_idx", reco_zz_to_4l,
180+
{"Electron_pt", "Electron_eta", "Electron_phi", "Electron_mass", "Electron_charge"});
181+
182+
// Cut on distance between Electrons building Z systems
183+
auto filter_z_dr = [](const RVec<RVec<size_t>> &idx, rvec_f eta, rvec_f phi) {
184+
for (size_t i = 0; i < 2; i++) {
185+
const auto i1 = idx[i][0];
186+
const auto i2 = idx[i][1];
187+
const auto dr = DeltaR(eta[i1], eta[i2], phi[i1], phi[i2]);
188+
if (dr < 0.02) {
189+
return false;
190+
}
191+
}
192+
return true;
193+
};
194+
auto df_z_dr = df_z_idx.Filter(filter_z_dr, {"Z_idx", "Electron_eta", "Electron_phi"},
195+
"Delta R separation of Electrons building Z system");
196+
197+
// Compute masses of Z systems
198+
auto df_z_mass = df_z_dr.Define("Z_mass", compute_z_masses_4l,
199+
{"Z_idx", "Electron_pt", "Electron_eta", "Electron_phi", "Electron_mass"});
200+
201+
// Cut on mass of Z candidates
202+
auto df_z_cut = filter_z_candidates(df_z_mass);
203+
204+
// Reconstruct H mass
205+
auto df_h_mass = df_z_cut.Define("H_mass", compute_higgs_mass_4l,
206+
{"Z_idx", "Electron_pt", "Electron_eta", "Electron_phi", "Electron_mass"});
207+
208+
return df_h_mass;
209+
}
210+
211+
// Plot invariant mass for signal and background processes from simulated events
212+
// overlay the measured data.
213+
template <typename T>
214+
void plot(T sig, T bkg, T data, const std::string &x_label, const std::string &filename)
215+
{
216+
// Canvas and general style options
217+
gStyle->SetOptStat(0);
218+
gStyle->SetTextFont(42);
219+
auto c = new TCanvas("c", "", 800, 700);
220+
c->SetLeftMargin(0.15);
221+
222+
// Get signal and background histograms and stack them to show Higgs signal
223+
// on top of the background process
224+
auto h_sig = *sig;
225+
auto h_bkg = *bkg;
226+
auto h_cmb = *(TH1D*)(sig->Clone());
227+
h_cmb.Add(&h_bkg);
228+
h_cmb.SetTitle("");
229+
h_cmb.GetXaxis()->SetTitle(x_label.c_str());
230+
h_cmb.GetXaxis()->SetTitleSize(0.04);
231+
h_cmb.GetYaxis()->SetTitle("N_{Events}");
232+
h_cmb.GetYaxis()->SetTitleSize(0.04);
233+
h_cmb.SetLineColor(kRed);
234+
h_cmb.SetLineWidth(2);
235+
h_cmb.SetMaximum(18);
236+
237+
h_bkg.SetLineWidth(2);
238+
h_bkg.SetFillStyle(1001);
239+
h_bkg.SetLineColor(kBlack);
240+
h_bkg.SetFillColor(kAzure - 9);
241+
242+
// Get histogram of data points
243+
auto h_data = *data;
244+
h_data.SetLineWidth(1);
245+
h_data.SetMarkerStyle(20);
246+
h_data.SetMarkerSize(1.0);
247+
h_data.SetMarkerColor(kBlack);
248+
h_data.SetLineColor(kBlack);
249+
250+
// Draw histograms
251+
h_cmb.DrawClone("HIST");
252+
h_bkg.DrawClone("HIST SAME");
253+
h_data.DrawClone("PE1 SAME");
254+
255+
// Add legend
256+
TLegend legend(0.62, 0.70, 0.82, 0.88);
257+
legend.SetFillColor(0);
258+
legend.SetBorderSize(0);
259+
legend.SetTextSize(0.03);
260+
legend.AddEntry(&h_data, "Data", "PE1");
261+
legend.AddEntry(&h_bkg, "ZZ", "f");
262+
legend.AddEntry(&h_cmb, "m_{H} = 125 GeV", "f");
263+
legend.Draw();
264+
265+
// Add header
266+
TLatex cms_label;
267+
cms_label.SetTextSize(0.04);
268+
cms_label.DrawLatexNDC(0.16, 0.92, "#bf{CMS Open Data}");
269+
TLatex header;
270+
header.SetTextSize(0.03);
271+
header.DrawLatexNDC(0.63, 0.92, "#sqrt{s} = 8 TeV, L_{int} = 11.6 fb^{-1}");
272+
273+
// Save plot
274+
c->SaveAs(filename.c_str());
275+
}
276+
277+
void payload(unsigned int nthreads)
278+
{
279+
#ifdef R__USE_IMT
280+
// Enable multi-threading
281+
if (nthreads) {
282+
ROOT::EnableImplicitMT(nthreads);
283+
} else {
284+
ROOT::DisableImplicitMT();
285+
}
286+
#endif // R__USE_IMT
287+
288+
// Take data files from the rootbench data directory
289+
std::string path = RB::GetDataDir();
290+
291+
// Create dataframes for signal, background and data samples
292+
293+
// Signal: Higgs -> 4 leptons
294+
ROOT::RDataFrame df_sig_4l("Events", path + "/SMHiggsToZZTo4L.root");
295+
296+
// Background: ZZ -> 4 leptons
297+
// Note that additional background processes from the original paper with minor contribution were left out for this
298+
// tutorial.
299+
ROOT::RDataFrame df_bkg_4mu("Events", path + "/ZZTo4mu.root");
300+
ROOT::RDataFrame df_bkg_4el("Events", path + "/ZZTo4e.root");
301+
302+
// CMS data taken in 2012 (11.6 fb^-1 integrated luminosity)
303+
ROOT::RDataFrame df_data_doublemu(
304+
"Events", {path + "/Run2012B_DoubleMuParked.root", path + "/Run2012C_DoubleMuParked.root"});
305+
ROOT::RDataFrame df_data_doubleel(
306+
"Events", {path + "/Run2012B_DoubleElectron.root", path + "/Run2012C_DoubleElectron.root"});
307+
308+
// Reconstruct Higgs to 4 muons
309+
auto df_sig_4mu_reco = reco_higgs_to_4mu(df_sig_4l);
310+
const auto luminosity = 11580.0; // Integrated luminosity of the data samples
311+
const auto xsec_SMHiggsToZZTo4L = 0.0065; // H->4l: Standard Model cross-section
312+
const auto nevt_SMHiggsToZZTo4L = 299973.0; // H->4l: Number of simulated events
313+
const auto nbins = 36; // Number of bins for the invariant mass spectrum
314+
auto df_h_sig_4mu = df_sig_4mu_reco
315+
.Define("weight", [&]() { return luminosity * xsec_SMHiggsToZZTo4L / nevt_SMHiggsToZZTo4L; }, {})
316+
.Histo1D({"h_sig_4mu", "", nbins, 70, 180}, "H_mass", "weight");
317+
318+
const auto scale_ZZTo4l = 1.386; // ZZ->4mu: Scale factor for ZZ to four leptons
319+
const auto xsec_ZZTo4mu = 0.077; // ZZ->4mu: Standard Model cross-section
320+
const auto nevt_ZZTo4mu = 1499064.0; // ZZ->4mu: Number of simulated events
321+
auto df_bkg_4mu_reco = reco_higgs_to_4mu(df_bkg_4mu);
322+
auto df_h_bkg_4mu = df_bkg_4mu_reco
323+
.Define("weight", [&]() { return luminosity * xsec_ZZTo4mu * scale_ZZTo4l / nevt_ZZTo4mu; }, {})
324+
.Histo1D({"h_bkg_4mu", "", nbins, 70, 180}, "H_mass", "weight");
325+
326+
auto df_data_4mu_reco = reco_higgs_to_4mu(df_data_doublemu);
327+
auto df_h_data_4mu = df_data_4mu_reco
328+
.Define("weight", []() { return 1.0; }, {})
329+
.Histo1D({"h_data_4mu", "", nbins, 70, 180}, "H_mass", "weight");
330+
331+
// Reconstruct Higgs to 4 electrons
332+
auto df_sig_4el_reco = reco_higgs_to_4el(df_sig_4l);
333+
auto df_h_sig_4el = df_sig_4el_reco
334+
.Define("weight", [&]() { return luminosity * xsec_SMHiggsToZZTo4L / nevt_SMHiggsToZZTo4L; }, {})
335+
.Histo1D({"h_sig_4el", "", nbins, 70, 180}, "H_mass", "weight");
336+
337+
const auto xsec_ZZTo4el = xsec_ZZTo4mu; // ZZ->4el: Standard Model cross-section
338+
const auto nevt_ZZTo4el = 1499093.0; // ZZ->4el: Number of simulated events
339+
auto df_bkg_4el_reco = reco_higgs_to_4el(df_bkg_4el);
340+
auto df_h_bkg_4el = df_bkg_4el_reco
341+
.Define("weight", [&]() { return luminosity * xsec_ZZTo4el * scale_ZZTo4l / nevt_ZZTo4el; }, {})
342+
.Histo1D({"h_bkg_4el", "", nbins, 70, 180}, "H_mass", "weight");
343+
344+
auto df_data_4el_reco = reco_higgs_to_4el(df_data_doubleel);
345+
auto df_h_data_4el = df_data_4el_reco.Define("weight", []() { return 1.0; }, {})
346+
.Histo1D({"h_data_4el", "", nbins, 70, 180}, "H_mass", "weight");
347+
348+
// Produce histograms for different channels and make plots
349+
plot(df_h_sig_4mu, df_h_bkg_4mu, df_h_data_4mu, "m_{4#mu} (GeV)", "higgs_4mu.pdf");
350+
plot(df_h_sig_4el, df_h_bkg_4el, df_h_data_4el, "m_{4e} (GeV)", "higgs_4el.pdf");
351+
}
352+
353+
static void df103_NanoAODHiggsAnalysis_noimt(benchmark::State &state)
354+
{
355+
for (auto _ : state)
356+
payload(0);
357+
}
358+
359+
static void df103_NanoAODHiggsAnalysis_imt(benchmark::State &state)
360+
{
361+
for (auto _ : state) {
362+
const auto nthreads = state.range(0);
363+
payload(nthreads);
364+
}
365+
}
366+
367+
BENCHMARK(df103_NanoAODHiggsAnalysis_noimt)->Repetitions(1);
368+
BENCHMARK(df103_NanoAODHiggsAnalysis_imt)->Repetitions(1)->Arg(8);
369+
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)