|
| 1 | +// This test is an adaption of the RDataFrame tutorial df103 using only the 4el and 4mu channels |
| 2 | + |
| 3 | +#include "ROOT/RDataFrame.hxx" |
| 4 | +#include "ROOT/RVec.hxx" |
| 5 | +#include "ROOT/RDF/RInterface.hxx" |
| 6 | +#include "TCanvas.h" |
| 7 | +#include "TH1D.h" |
| 8 | +#include "TLatex.h" |
| 9 | +#include "TLegend.h" |
| 10 | +#include <Math/Vector4Dfwd.h> |
| 11 | +#include <Math/GenVector/LorentzVector.h> |
| 12 | +#include <Math/GenVector/PtEtaPhiM4D.h> |
| 13 | +#include "TStyle.h" |
| 14 | +#include <string> |
| 15 | + |
| 16 | +#include "benchmark/benchmark.h" |
| 17 | +#include "rootbench/RBConfig.h" |
| 18 | + |
| 19 | +using namespace ROOT::VecOps; |
| 20 | +using RNode = ROOT::RDF::RNode; |
| 21 | +using rvec_f = const RVec<float> &; |
| 22 | +using rvec_i = const RVec<int> &; |
| 23 | + |
| 24 | +const auto z_mass = 91.2; |
| 25 | + |
| 26 | +// Select interesting events with four muons |
| 27 | +RNode selection_4mu(RNode df) |
| 28 | +{ |
| 29 | + auto df_ge4m = df.Filter("nMuon>=4", "At least four muons"); |
| 30 | + auto df_iso = df_ge4m.Filter("All(abs(Muon_pfRelIso04_all)<0.40)", "Require good isolation"); |
| 31 | + auto df_kin = df_iso.Filter("All(Muon_pt>5) && All(abs(Muon_eta)<2.4)", "Good muon kinematics"); |
| 32 | + auto df_ip3d = df_kin.Define("Muon_ip3d", "sqrt(Muon_dxy*Muon_dxy + Muon_dz*Muon_dz)"); |
| 33 | + auto df_sip3d = df_ip3d.Define("Muon_sip3d", "Muon_ip3d/sqrt(Muon_dxyErr*Muon_dxyErr + Muon_dzErr*Muon_dzErr)"); |
| 34 | + auto df_pv = df_sip3d.Filter("All(Muon_sip3d<4) && All(abs(Muon_dxy)<0.5) && All(abs(Muon_dz)<1.0)", |
| 35 | + "Track close to primary vertex with small uncertainty"); |
| 36 | + auto df_2p2n = df_pv.Filter("nMuon==4 && Sum(Muon_charge==1)==2 && Sum(Muon_charge==-1)==2", |
| 37 | + "Two positive and two negative muons"); |
| 38 | + return df_2p2n; |
| 39 | +} |
| 40 | + |
| 41 | +// Select interesting events with four electrons |
| 42 | +RNode selection_4el(RNode df) |
| 43 | +{ |
| 44 | + auto df_ge4el = df.Filter("nElectron>=4", "At least our electrons"); |
| 45 | + auto df_iso = df_ge4el.Filter("All(abs(Electron_pfRelIso03_all)<0.40)", "Require good isolation"); |
| 46 | + auto df_kin = df_iso.Filter("All(Electron_pt>7) && All(abs(Electron_eta)<2.5)", "Good Electron kinematics"); |
| 47 | + auto df_ip3d = df_kin.Define("Electron_ip3d", "sqrt(Electron_dxy*Electron_dxy + Electron_dz*Electron_dz)"); |
| 48 | + auto df_sip3d = df_ip3d.Define("Electron_sip3d", |
| 49 | + "Electron_ip3d/sqrt(Electron_dxyErr*Electron_dxyErr + Electron_dzErr*Electron_dzErr)"); |
| 50 | + auto df_pv = df_sip3d.Filter("All(Electron_sip3d<4) && All(abs(Electron_dxy)<0.5) && " |
| 51 | + "All(abs(Electron_dz)<1.0)", |
| 52 | + "Track close to primary vertex with small uncertainty"); |
| 53 | + auto df_2p2n = df_pv.Filter("nElectron==4 && Sum(Electron_charge==1)==2 && Sum(Electron_charge==-1)==2", |
| 54 | + "Two positive and two negative electrons"); |
| 55 | + return df_2p2n; |
| 56 | +} |
| 57 | + |
| 58 | +// Reconstruct two Z candidates from four leptons of the same kind |
| 59 | +RVec<RVec<size_t>> reco_zz_to_4l(rvec_f pt, rvec_f eta, rvec_f phi, rvec_f mass, rvec_i charge) |
| 60 | +{ |
| 61 | + RVec<RVec<size_t>> idx(2); |
| 62 | + idx[0].reserve(2); idx[1].reserve(2); |
| 63 | + |
| 64 | + // Find first lepton pair with invariant mass closest to Z mass |
| 65 | + auto idx_cmb = Combinations(pt, 2); |
| 66 | + auto best_mass = -1; |
| 67 | + size_t best_i1 = 0; size_t best_i2 = 0; |
| 68 | + for (size_t i = 0; i < idx_cmb[0].size(); i++) { |
| 69 | + const auto i1 = idx_cmb[0][i]; |
| 70 | + const auto i2 = idx_cmb[1][i]; |
| 71 | + if (charge[i1] != charge[i2]) { |
| 72 | + ROOT::Math::PtEtaPhiMVector p1(pt[i1], eta[i1], phi[i1], mass[i1]); |
| 73 | + ROOT::Math::PtEtaPhiMVector p2(pt[i2], eta[i2], phi[i2], mass[i2]); |
| 74 | + const auto this_mass = (p1 + p2).M(); |
| 75 | + if (std::abs(z_mass - this_mass) < std::abs(z_mass - best_mass)) { |
| 76 | + best_mass = this_mass; |
| 77 | + best_i1 = i1; |
| 78 | + best_i2 = i2; |
| 79 | + } |
| 80 | + } |
| 81 | + } |
| 82 | + idx[0].emplace_back(best_i1); |
| 83 | + idx[0].emplace_back(best_i2); |
| 84 | + |
| 85 | + // Reconstruct second Z from remaining lepton pair |
| 86 | + for (size_t i = 0; i < 4; i++) { |
| 87 | + if (i != best_i1 && i != best_i2) { |
| 88 | + idx[1].emplace_back(i); |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + // Return indices of the pairs building two Z bosons |
| 93 | + return idx; |
| 94 | +} |
| 95 | + |
| 96 | +// Compute Z masses from four leptons of the same kind and sort ascending in distance to Z mass |
| 97 | +RVec<float> compute_z_masses_4l(const RVec<RVec<size_t>> &idx, rvec_f pt, rvec_f eta, rvec_f phi, rvec_f mass) |
| 98 | +{ |
| 99 | + RVec<float> z_masses(2); |
| 100 | + for (size_t i = 0; i < 2; i++) { |
| 101 | + const auto i1 = idx[i][0]; const auto i2 = idx[i][1]; |
| 102 | + ROOT::Math::PtEtaPhiMVector p1(pt[i1], eta[i1], phi[i1], mass[i1]); |
| 103 | + ROOT::Math::PtEtaPhiMVector p2(pt[i2], eta[i2], phi[i2], mass[i2]); |
| 104 | + z_masses[i] = (p1 + p2).M(); |
| 105 | + } |
| 106 | + if (std::abs(z_masses[0] - z_mass) < std::abs(z_masses[1] - z_mass)) { |
| 107 | + return z_masses; |
| 108 | + } else { |
| 109 | + return Reverse(z_masses); |
| 110 | + } |
| 111 | +} |
| 112 | + |
| 113 | +// Compute mass of Higgs from four leptons of the same kind |
| 114 | +float compute_higgs_mass_4l(const RVec<RVec<size_t>> &idx, rvec_f pt, rvec_f eta, rvec_f phi, rvec_f mass) |
| 115 | +{ |
| 116 | + const auto i1 = idx[0][0]; const auto i2 = idx[0][1]; |
| 117 | + const auto i3 = idx[1][0]; const auto i4 = idx[1][1]; |
| 118 | + ROOT::Math::PtEtaPhiMVector p1(pt[i1], eta[i1], phi[i1], mass[i1]); |
| 119 | + ROOT::Math::PtEtaPhiMVector p2(pt[i2], eta[i2], phi[i2], mass[i2]); |
| 120 | + ROOT::Math::PtEtaPhiMVector p3(pt[i3], eta[i3], phi[i3], mass[i3]); |
| 121 | + ROOT::Math::PtEtaPhiMVector p4(pt[i4], eta[i4], phi[i4], mass[i4]); |
| 122 | + return (p1 + p2 + p3 + p4).M(); |
| 123 | +} |
| 124 | + |
| 125 | +// Apply selection on reconstructed Z candidates |
| 126 | +RNode filter_z_candidates(RNode df) |
| 127 | +{ |
| 128 | + auto df_z1_cut = df.Filter("Z_mass[0] > 40 && Z_mass[0] < 120", "Mass of first Z candidate in [40, 120]"); |
| 129 | + auto df_z2_cut = df_z1_cut.Filter("Z_mass[1] > 12 && Z_mass[1] < 120", "Mass of second Z candidate in [12, 120]"); |
| 130 | + return df_z2_cut; |
| 131 | +} |
| 132 | + |
| 133 | +// Reconstruct Higgs from four muons |
| 134 | +RNode reco_higgs_to_4mu(RNode df) |
| 135 | +{ |
| 136 | + // Filter interesting events |
| 137 | + auto df_base = selection_4mu(df); |
| 138 | + |
| 139 | + // Reconstruct Z systems |
| 140 | + auto df_z_idx = |
| 141 | + df_base.Define("Z_idx", reco_zz_to_4l, {"Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass", "Muon_charge"}); |
| 142 | + |
| 143 | + // Cut on distance between muons building Z systems |
| 144 | + auto filter_z_dr = [](const RVec<RVec<size_t>> &idx, rvec_f eta, rvec_f phi) { |
| 145 | + for (size_t i = 0; i < 2; i++) { |
| 146 | + const auto i1 = idx[i][0]; |
| 147 | + const auto i2 = idx[i][1]; |
| 148 | + const auto dr = DeltaR(eta[i1], eta[i2], phi[i1], phi[i2]); |
| 149 | + if (dr < 0.02) { |
| 150 | + return false; |
| 151 | + } |
| 152 | + } |
| 153 | + return true; |
| 154 | + }; |
| 155 | + auto df_z_dr = |
| 156 | + df_z_idx.Filter(filter_z_dr, {"Z_idx", "Muon_eta", "Muon_phi"}, "Delta R separation of muons building Z system"); |
| 157 | + |
| 158 | + // Compute masses of Z systems |
| 159 | + auto df_z_mass = |
| 160 | + df_z_dr.Define("Z_mass", compute_z_masses_4l, {"Z_idx", "Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass"}); |
| 161 | + |
| 162 | + // Cut on mass of Z candidates |
| 163 | + auto df_z_cut = filter_z_candidates(df_z_mass); |
| 164 | + |
| 165 | + // Reconstruct H mass |
| 166 | + auto df_h_mass = |
| 167 | + df_z_cut.Define("H_mass", compute_higgs_mass_4l, {"Z_idx", "Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass"}); |
| 168 | + |
| 169 | + return df_h_mass; |
| 170 | +} |
| 171 | + |
| 172 | +// Reconstruct Higgs from four electrons |
| 173 | +RNode reco_higgs_to_4el(RNode df) |
| 174 | +{ |
| 175 | + // Filter interesting events |
| 176 | + auto df_base = selection_4el(df); |
| 177 | + |
| 178 | + // Reconstruct Z systems |
| 179 | + auto df_z_idx = df_base.Define("Z_idx", reco_zz_to_4l, |
| 180 | + {"Electron_pt", "Electron_eta", "Electron_phi", "Electron_mass", "Electron_charge"}); |
| 181 | + |
| 182 | + // Cut on distance between Electrons building Z systems |
| 183 | + auto filter_z_dr = [](const RVec<RVec<size_t>> &idx, rvec_f eta, rvec_f phi) { |
| 184 | + for (size_t i = 0; i < 2; i++) { |
| 185 | + const auto i1 = idx[i][0]; |
| 186 | + const auto i2 = idx[i][1]; |
| 187 | + const auto dr = DeltaR(eta[i1], eta[i2], phi[i1], phi[i2]); |
| 188 | + if (dr < 0.02) { |
| 189 | + return false; |
| 190 | + } |
| 191 | + } |
| 192 | + return true; |
| 193 | + }; |
| 194 | + auto df_z_dr = df_z_idx.Filter(filter_z_dr, {"Z_idx", "Electron_eta", "Electron_phi"}, |
| 195 | + "Delta R separation of Electrons building Z system"); |
| 196 | + |
| 197 | + // Compute masses of Z systems |
| 198 | + auto df_z_mass = df_z_dr.Define("Z_mass", compute_z_masses_4l, |
| 199 | + {"Z_idx", "Electron_pt", "Electron_eta", "Electron_phi", "Electron_mass"}); |
| 200 | + |
| 201 | + // Cut on mass of Z candidates |
| 202 | + auto df_z_cut = filter_z_candidates(df_z_mass); |
| 203 | + |
| 204 | + // Reconstruct H mass |
| 205 | + auto df_h_mass = df_z_cut.Define("H_mass", compute_higgs_mass_4l, |
| 206 | + {"Z_idx", "Electron_pt", "Electron_eta", "Electron_phi", "Electron_mass"}); |
| 207 | + |
| 208 | + return df_h_mass; |
| 209 | +} |
| 210 | + |
| 211 | +// Plot invariant mass for signal and background processes from simulated events |
| 212 | +// overlay the measured data. |
| 213 | +template <typename T> |
| 214 | +void plot(T sig, T bkg, T data, const std::string &x_label, const std::string &filename) |
| 215 | +{ |
| 216 | + // Canvas and general style options |
| 217 | + gStyle->SetOptStat(0); |
| 218 | + gStyle->SetTextFont(42); |
| 219 | + auto c = new TCanvas("c", "", 800, 700); |
| 220 | + c->SetLeftMargin(0.15); |
| 221 | + |
| 222 | + // Get signal and background histograms and stack them to show Higgs signal |
| 223 | + // on top of the background process |
| 224 | + auto h_sig = *sig; |
| 225 | + auto h_bkg = *bkg; |
| 226 | + auto h_cmb = *(TH1D*)(sig->Clone()); |
| 227 | + h_cmb.Add(&h_bkg); |
| 228 | + h_cmb.SetTitle(""); |
| 229 | + h_cmb.GetXaxis()->SetTitle(x_label.c_str()); |
| 230 | + h_cmb.GetXaxis()->SetTitleSize(0.04); |
| 231 | + h_cmb.GetYaxis()->SetTitle("N_{Events}"); |
| 232 | + h_cmb.GetYaxis()->SetTitleSize(0.04); |
| 233 | + h_cmb.SetLineColor(kRed); |
| 234 | + h_cmb.SetLineWidth(2); |
| 235 | + h_cmb.SetMaximum(18); |
| 236 | + |
| 237 | + h_bkg.SetLineWidth(2); |
| 238 | + h_bkg.SetFillStyle(1001); |
| 239 | + h_bkg.SetLineColor(kBlack); |
| 240 | + h_bkg.SetFillColor(kAzure - 9); |
| 241 | + |
| 242 | + // Get histogram of data points |
| 243 | + auto h_data = *data; |
| 244 | + h_data.SetLineWidth(1); |
| 245 | + h_data.SetMarkerStyle(20); |
| 246 | + h_data.SetMarkerSize(1.0); |
| 247 | + h_data.SetMarkerColor(kBlack); |
| 248 | + h_data.SetLineColor(kBlack); |
| 249 | + |
| 250 | + // Draw histograms |
| 251 | + h_cmb.DrawClone("HIST"); |
| 252 | + h_bkg.DrawClone("HIST SAME"); |
| 253 | + h_data.DrawClone("PE1 SAME"); |
| 254 | + |
| 255 | + // Add legend |
| 256 | + TLegend legend(0.62, 0.70, 0.82, 0.88); |
| 257 | + legend.SetFillColor(0); |
| 258 | + legend.SetBorderSize(0); |
| 259 | + legend.SetTextSize(0.03); |
| 260 | + legend.AddEntry(&h_data, "Data", "PE1"); |
| 261 | + legend.AddEntry(&h_bkg, "ZZ", "f"); |
| 262 | + legend.AddEntry(&h_cmb, "m_{H} = 125 GeV", "f"); |
| 263 | + legend.Draw(); |
| 264 | + |
| 265 | + // Add header |
| 266 | + TLatex cms_label; |
| 267 | + cms_label.SetTextSize(0.04); |
| 268 | + cms_label.DrawLatexNDC(0.16, 0.92, "#bf{CMS Open Data}"); |
| 269 | + TLatex header; |
| 270 | + header.SetTextSize(0.03); |
| 271 | + header.DrawLatexNDC(0.63, 0.92, "#sqrt{s} = 8 TeV, L_{int} = 11.6 fb^{-1}"); |
| 272 | + |
| 273 | + // Save plot |
| 274 | + c->SaveAs(filename.c_str()); |
| 275 | +} |
| 276 | + |
| 277 | +void payload(unsigned int nthreads) |
| 278 | +{ |
| 279 | +#ifdef R__USE_IMT |
| 280 | + // Enable multi-threading |
| 281 | + if (nthreads) { |
| 282 | + ROOT::EnableImplicitMT(nthreads); |
| 283 | + } else { |
| 284 | + ROOT::DisableImplicitMT(); |
| 285 | + } |
| 286 | +#endif // R__USE_IMT |
| 287 | + |
| 288 | + // Take data files from the rootbench data directory |
| 289 | + std::string path = RB::GetDataDir(); |
| 290 | + |
| 291 | + // Create dataframes for signal, background and data samples |
| 292 | + |
| 293 | + // Signal: Higgs -> 4 leptons |
| 294 | + ROOT::RDataFrame df_sig_4l("Events", path + "/SMHiggsToZZTo4L.root"); |
| 295 | + |
| 296 | + // Background: ZZ -> 4 leptons |
| 297 | + // Note that additional background processes from the original paper with minor contribution were left out for this |
| 298 | + // tutorial. |
| 299 | + ROOT::RDataFrame df_bkg_4mu("Events", path + "/ZZTo4mu.root"); |
| 300 | + ROOT::RDataFrame df_bkg_4el("Events", path + "/ZZTo4e.root"); |
| 301 | + |
| 302 | + // CMS data taken in 2012 (11.6 fb^-1 integrated luminosity) |
| 303 | + ROOT::RDataFrame df_data_doublemu( |
| 304 | + "Events", {path + "/Run2012B_DoubleMuParked.root", path + "/Run2012C_DoubleMuParked.root"}); |
| 305 | + ROOT::RDataFrame df_data_doubleel( |
| 306 | + "Events", {path + "/Run2012B_DoubleElectron.root", path + "/Run2012C_DoubleElectron.root"}); |
| 307 | + |
| 308 | + // Reconstruct Higgs to 4 muons |
| 309 | + auto df_sig_4mu_reco = reco_higgs_to_4mu(df_sig_4l); |
| 310 | + const auto luminosity = 11580.0; // Integrated luminosity of the data samples |
| 311 | + const auto xsec_SMHiggsToZZTo4L = 0.0065; // H->4l: Standard Model cross-section |
| 312 | + const auto nevt_SMHiggsToZZTo4L = 299973.0; // H->4l: Number of simulated events |
| 313 | + const auto nbins = 36; // Number of bins for the invariant mass spectrum |
| 314 | + auto df_h_sig_4mu = df_sig_4mu_reco |
| 315 | + .Define("weight", [&]() { return luminosity * xsec_SMHiggsToZZTo4L / nevt_SMHiggsToZZTo4L; }, {}) |
| 316 | + .Histo1D({"h_sig_4mu", "", nbins, 70, 180}, "H_mass", "weight"); |
| 317 | + |
| 318 | + const auto scale_ZZTo4l = 1.386; // ZZ->4mu: Scale factor for ZZ to four leptons |
| 319 | + const auto xsec_ZZTo4mu = 0.077; // ZZ->4mu: Standard Model cross-section |
| 320 | + const auto nevt_ZZTo4mu = 1499064.0; // ZZ->4mu: Number of simulated events |
| 321 | + auto df_bkg_4mu_reco = reco_higgs_to_4mu(df_bkg_4mu); |
| 322 | + auto df_h_bkg_4mu = df_bkg_4mu_reco |
| 323 | + .Define("weight", [&]() { return luminosity * xsec_ZZTo4mu * scale_ZZTo4l / nevt_ZZTo4mu; }, {}) |
| 324 | + .Histo1D({"h_bkg_4mu", "", nbins, 70, 180}, "H_mass", "weight"); |
| 325 | + |
| 326 | + auto df_data_4mu_reco = reco_higgs_to_4mu(df_data_doublemu); |
| 327 | + auto df_h_data_4mu = df_data_4mu_reco |
| 328 | + .Define("weight", []() { return 1.0; }, {}) |
| 329 | + .Histo1D({"h_data_4mu", "", nbins, 70, 180}, "H_mass", "weight"); |
| 330 | + |
| 331 | + // Reconstruct Higgs to 4 electrons |
| 332 | + auto df_sig_4el_reco = reco_higgs_to_4el(df_sig_4l); |
| 333 | + auto df_h_sig_4el = df_sig_4el_reco |
| 334 | + .Define("weight", [&]() { return luminosity * xsec_SMHiggsToZZTo4L / nevt_SMHiggsToZZTo4L; }, {}) |
| 335 | + .Histo1D({"h_sig_4el", "", nbins, 70, 180}, "H_mass", "weight"); |
| 336 | + |
| 337 | + const auto xsec_ZZTo4el = xsec_ZZTo4mu; // ZZ->4el: Standard Model cross-section |
| 338 | + const auto nevt_ZZTo4el = 1499093.0; // ZZ->4el: Number of simulated events |
| 339 | + auto df_bkg_4el_reco = reco_higgs_to_4el(df_bkg_4el); |
| 340 | + auto df_h_bkg_4el = df_bkg_4el_reco |
| 341 | + .Define("weight", [&]() { return luminosity * xsec_ZZTo4el * scale_ZZTo4l / nevt_ZZTo4el; }, {}) |
| 342 | + .Histo1D({"h_bkg_4el", "", nbins, 70, 180}, "H_mass", "weight"); |
| 343 | + |
| 344 | + auto df_data_4el_reco = reco_higgs_to_4el(df_data_doubleel); |
| 345 | + auto df_h_data_4el = df_data_4el_reco.Define("weight", []() { return 1.0; }, {}) |
| 346 | + .Histo1D({"h_data_4el", "", nbins, 70, 180}, "H_mass", "weight"); |
| 347 | + |
| 348 | + // Produce histograms for different channels and make plots |
| 349 | + plot(df_h_sig_4mu, df_h_bkg_4mu, df_h_data_4mu, "m_{4#mu} (GeV)", "higgs_4mu.pdf"); |
| 350 | + plot(df_h_sig_4el, df_h_bkg_4el, df_h_data_4el, "m_{4e} (GeV)", "higgs_4el.pdf"); |
| 351 | +} |
| 352 | + |
| 353 | +static void df103_NanoAODHiggsAnalysis_noimt(benchmark::State &state) |
| 354 | +{ |
| 355 | + for (auto _ : state) |
| 356 | + payload(0); |
| 357 | +} |
| 358 | + |
| 359 | +static void df103_NanoAODHiggsAnalysis_imt(benchmark::State &state) |
| 360 | +{ |
| 361 | + for (auto _ : state) { |
| 362 | + const auto nthreads = state.range(0); |
| 363 | + payload(nthreads); |
| 364 | + } |
| 365 | +} |
| 366 | + |
| 367 | +BENCHMARK(df103_NanoAODHiggsAnalysis_noimt)->Repetitions(1); |
| 368 | +BENCHMARK(df103_NanoAODHiggsAnalysis_imt)->Repetitions(1)->Arg(8); |
| 369 | +BENCHMARK_MAIN(); |
0 commit comments