Add RNTuple benchmark for ATLAS file

kamahori · kamahori · commit 83699a6cab4e · 2020-07-30T10:43:36.000Z
diff --git a/root/tree/tree/CMakeLists.txt b/root/tree/tree/CMakeLists.txt
@@ -15,6 +15,12 @@ if(ROOT_root7_FOUND)
             ROOTDataFrame ROOTVecOps ROOTNTuple
             DEPENDS libh1event
   )
+
+  RB_ADD_TOOL(gen_atlas ${CMAKE_CURRENT_SOURCE_DIR}/gen_atlas.cxx
+            LIBRARIES Core Hist Imt RIO Tree TreePlayer 
+            ROOTDataFrame ROOTVecOps ROOTNTuple
+            DEPENDS libh1event
+  )
 endif()
 
 if(ROOT_root7_FOUND AND rootbench-datafiles)
@@ -28,4 +34,15 @@ if(ROOT_root7_FOUND AND rootbench-datafiles)
     SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_h1 -o ${RB_DATASETDIR} -c lz4 ${RB_DATASETDIR}/h1dst-lz4.root"
     SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_h1 -o ${RB_DATASETDIR} -c zstd ${RB_DATASETDIR}/h1dst-zstd.root"
     SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_h1 -o ${RB_DATASETDIR} -c none ${RB_DATASETDIR}/h1dst-none.root")
+  
+  RB_ADD_GBENCHMARK(RNTupleATLASBenchmarks
+    RNTupleATLASBenchmarks.cxx
+    LABEL short
+    LIBRARIES Core Hist MathCore RIO Tree ROOTNTuple
+    DOWNLOAD_DATAFILES gg_data-zstd.root
+    SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c zlib"
+    SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c lzma"
+    SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c lz4"
+    SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c zstd"
+    SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c none")
 endif()
diff --git a/root/tree/tree/RNTupleATLASBenchmarks.cxx b/root/tree/tree/RNTupleATLASBenchmarks.cxx
@@ -0,0 +1,149 @@
+#include <benchmark/benchmark.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdio>
+#include <iostream>
+#include <future>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <ROOT/RDataFrame.hxx>
+#include <ROOT/RNTuple.hxx>
+#include <ROOT/RNTupleDS.hxx>
+#include <ROOT/RNTupleOptions.hxx>
+#include <Compression.h>
+#include <TApplication.h>
+#include <TBranch.h>
+#include <TCanvas.h>
+#include <TClassTable.h>
+#include <TF1.h>
+#include <TFile.h>
+#include <TH1D.h>
+#include <TLatex.h>
+#include <TLegend.h>
+#include <TLorentzVector.h>
+#include <TMath.h>
+#include <TStyle.h>
+#include <TSystem.h>
+#include <TTree.h>
+#include <TTreeReader.h>
+#include <TTreePerfStats.h>
+
+#include <Math/Vector4D.h>
+
+#include <rootbench/RBConfig.h>
+
+
+static float ComputeInvariantMass(float pt0, float pt1, float eta0, float eta1, float phi0, float phi1, float e0, float e1)
+{
+    ROOT::Math::PtEtaPhiEVector p1(pt0, eta0, phi0, e0);
+    ROOT::Math::PtEtaPhiEVector p2(pt1, eta1, phi1, e1);
+    return (p1 + p2).mass() / 1000.0;
+}
+
+
+static void ProcessNTuple(ROOT::Experimental::RNTupleReader *ntuple, TH1D *hMass, bool isMC)
+{
+
+    auto viewTrigP           = ntuple->GetView<bool>("trigP");
+    auto viewPhotonN         = ntuple->GetView<std::uint32_t>("photon_n");
+    auto viewPhotonIsTightId = ntuple->GetView<std::vector<bool>>("photon_isTightID");
+    auto viewPhotonPt        = ntuple->GetView<std::vector<float>>("photon_pt");
+    auto viewPhotonEta       = ntuple->GetView<std::vector<float>>("photon_eta");
+    auto viewPhotonPhi       = ntuple->GetView<std::vector<float>>("photon_phi");
+    auto viewPhotonE         = ntuple->GetView<std::vector<float>>("photon_E");
+    auto viewPhotonPtCone30  = ntuple->GetView<std::vector<float>>("photon_ptcone30");
+    auto viewPhotonEtCone20  = ntuple->GetView<std::vector<float>>("photon_etcone20");
+
+    auto viewScaleFactorPhoton        = ntuple->GetView<float>("scaleFactor_PHOTON");
+    auto viewScaleFactorPhotonTrigger = ntuple->GetView<float>("scaleFactor_PhotonTRIGGER");
+    auto viewScaleFactorPileUp        = ntuple->GetView<float>("scaleFactor_PILEUP");
+    auto viewMcWeight                 = ntuple->GetView<float>("mcWeight");
+
+    unsigned nevents = 0;
+    for (auto e : ntuple->GetEntryRange()) {
+        nevents++;
+
+        if (!viewTrigP(e)) continue;
+
+        std::vector<size_t> idxGood;
+        auto isTightId = viewPhotonIsTightId(e);
+        auto pt = viewPhotonPt(e);
+        auto eta = viewPhotonEta(e);
+
+        for (size_t i = 0; i < viewPhotonN(e); ++i) {
+            if (!isTightId[i]) continue;
+            if (pt[i] <= 25000.) continue;
+            if (abs(eta[i]) >= 2.37) continue;
+            if (abs(eta[i]) >= 1.37 && abs(eta[i]) <= 1.52) continue;
+            idxGood.push_back(i);
+        }
+        if (idxGood.size() != 2) continue;
+
+        auto ptCone30 = viewPhotonPtCone30(e);
+        auto etCone20 = viewPhotonEtCone20(e);
+
+        bool isIsolatedPhotons = true;
+        for (int i = 0; i < 2; ++i) {
+            if ((ptCone30[idxGood[i]] / pt[idxGood[i]] >= 0.065) ||
+                (etCone20[idxGood[i]] / pt[idxGood[i]] >= 0.065))
+            {
+            isIsolatedPhotons = false;
+            break;
+            }
+        }
+        if (!isIsolatedPhotons) continue;
+
+        auto phi = viewPhotonPhi(e);
+        auto E = viewPhotonE(e);
+
+        float myy = ComputeInvariantMass(
+            pt[idxGood[0]], pt[idxGood[1]],
+            eta[idxGood[0]], eta[idxGood[1]],
+            phi[idxGood[0]], phi[idxGood[1]],
+            E[idxGood[0]], E[idxGood[1]]);
+
+        if (pt[idxGood[0]] / 1000. / myy <= 0.35) continue;
+        if (pt[idxGood[1]] / 1000. / myy <= 0.25) continue;
+        if (myy <= 105) continue;
+        if (myy >= 160) continue;
+
+        if (isMC) {
+            auto weight = viewScaleFactorPhoton(e) * viewScaleFactorPhotonTrigger(e) *
+                        viewScaleFactorPileUp(e) * viewMcWeight(e);
+            hMass->Fill(myy, weight);
+        } else {
+            hMass->Fill(myy);
+        }
+    }
+}
+
+
+static void BM_RNTuple_ATLAS(benchmark::State &state, const std::string &comprAlgorithm)
+{
+    using RNTupleReader = ROOT::Experimental::RNTupleReader;
+    std::string path = RB::GetDataDir() + "/atlas-" + comprAlgorithm + ".ntuple";
+
+    for (auto _ : state) {
+        auto hData = new TH1D("", "Diphoton invariant mass; m_{#gamma#gamma} [GeV];Events", 30, 105, 160);
+
+        auto ntuple = RNTupleReader::Open("mini", path);
+        ProcessNTuple(ntuple.get(), hData, false /* isMC */);
+
+        delete hData;
+    }
+}
+BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_LZ4, "lz4")->Unit(benchmark::kMicrosecond)->Iterations(5);
+BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_ZLIB, "zlib")->Unit(benchmark::kMicrosecond)->Iterations(5);
+BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_LZMA, "lzma")->Unit(benchmark::kMicrosecond)->Iterations(5);
+BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_ZSTD, "zstd")->Unit(benchmark::kMicrosecond)->Iterations(5);
+BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_None, "none")->Unit(benchmark::kMicrosecond)->Iterations(5);
+
+BENCHMARK_MAIN(); 
diff --git a/root/tree/tree/gen_atlas.cxx b/root/tree/tree/gen_atlas.cxx
@@ -0,0 +1,165 @@
+#include <ROOT/RField.hxx>
+#include <ROOT/RNTuple.hxx>
+#include <ROOT/RNTupleModel.hxx>
+#include <ROOT/RNTupleOptions.hxx>
+
+#include <TBranch.h>
+#include <TBranchElement.h>
+#include <TBranchSTL.h>
+#include <TCanvas.h>
+#include <TFile.h>
+#include <TH1F.h>
+#include <TLeaf.h>
+#include <TTree.h>
+
+#include <cassert>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <unistd.h>
+
+using RNTupleModel = ROOT::Experimental::RNTupleModel;
+using RFieldBase = ROOT::Experimental::Detail::RFieldBase;
+using RNTupleWriter = ROOT::Experimental::RNTupleWriter;
+using RNTupleWriteOptions = ROOT::Experimental::RNTupleWriteOptions;
+
+int GetCompressionSettings(std::string shorthand)
+{
+   if (shorthand == "zlib")
+      return 101;
+   if (shorthand == "lz4")
+      return 404;
+   if (shorthand == "lzma")
+      return 207;
+   if (shorthand == "zstd")
+      return 505;
+   if (shorthand == "none")
+      return 0;
+   abort();
+}
+
+
+static void SplitPath(const std::string &path, std::string *basename, std::string *suffix)
+{
+    size_t idx_dot = path.find_last_of(".");
+    if (idx_dot == std::string::npos) {
+        *basename = path;
+        suffix->clear();
+    } else {
+        *basename = path.substr(0, idx_dot);
+        *suffix = path.substr(idx_dot + 1);
+    }
+}
+
+
+std::string StripSuffix(const std::string &path) {
+    std::string basename;
+    std::string suffix;
+    SplitPath(path, &basename, &suffix);
+    return basename;
+}
+
+
+std::string GetFileName(const std::string &path) {
+    const std::string::size_type idx = path.find_last_of('/');
+    if (idx != std::string::npos)
+        return path.substr(idx+1);
+    else
+        return path;
+}
+
+
+void Usage(char *progname) {
+   std::cout << "Usage: " << progname << " -i <gg_*.root> -o <ntuple-path> -c <compression>" << std::endl;
+}
+
+
+int main(int argc, char **argv) {
+   std::string inputFile = "gg_data.root";
+   std::string outputPath = ".";
+   int compressionSettings = 0;
+   std::string compressionShorthand = "none";
+
+   int c;
+   while ((c = getopt(argc, argv, "hvi:o:c:")) != -1) {
+      switch (c) {
+      case 'h':
+      case 'v':
+         Usage(argv[0]);
+         return 0;
+      case 'i':
+         inputFile = optarg;
+         break;
+      case 'o':
+         outputPath = optarg;
+         break;
+      case 'c':
+         compressionSettings = GetCompressionSettings(optarg);
+         compressionShorthand = optarg;
+         break;
+      default:
+         fprintf(stderr, "Unknown option: -%c\n", c);
+         Usage(argv[0]);
+         return 1;
+      }
+   }
+   std::string outputFile = outputPath + "/" + "atlas" + "-" + compressionShorthand + ".ntuple";
+
+   std::unique_ptr<TFile> f(TFile::Open(inputFile.c_str()));
+   assert(f && ! f->IsZombie());
+
+   auto model = RNTupleModel::Create();
+
+   auto tree = f->Get<TTree>("mini");
+   for (auto b : TRangeDynCast<TBranch>(*tree->GetListOfBranches())) {
+      assert(b);
+
+      TLeaf *l = static_cast<TLeaf*>(b->GetListOfLeaves()->First());
+
+      auto field = RFieldBase::Create(l->GetName(), l->GetTypeName());
+
+      if (typeid(*b) == typeid(TBranchSTL) || typeid(*b) == typeid(TBranchElement)) {
+         if (field->GetType() == "std::vector<bool>") {
+            std::vector<bool> **v = new std::vector<bool> *();
+            tree->SetBranchAddress(b->GetName(), v);
+            model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
+            model->GetRootField()->Attach(std::unique_ptr<RFieldBase>(field));
+         } else if (field->GetType() == "std::vector<float>") {
+            std::vector<float> **v = new std::vector<float> *();
+            tree->SetBranchAddress(b->GetName(), v);
+            model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
+            model->GetRootField()->Attach(std::unique_ptr<RFieldBase>(field));
+         } else if (field->GetType() == "std::vector<std::int32_t>") {
+            std::vector<std::int32_t> **v = new std::vector<std::int32_t> *();
+            tree->SetBranchAddress(b->GetName(), v);
+            model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
+            model->GetRootField()->Attach(std::unique_ptr<RFieldBase>(field));
+         } else if (field->GetType() == "std::vector<std::uint32_t>") {
+            std::vector<std::uint32_t> **v = new std::vector<std::uint32_t> *();
+            tree->SetBranchAddress(b->GetName(), v);
+            model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
+            model->GetRootField()->Attach(std::unique_ptr<RFieldBase>(field));
+         } else {
+            assert(false);
+         }
+      } else {
+         model->AddField(std::unique_ptr<RFieldBase>(field));
+         void *fieldDataPtr = model->GetDefaultEntry()->GetValue(l->GetName()).GetRawPtr();
+         tree->SetBranchAddress(b->GetName(), fieldDataPtr);
+      }
+   }
+
+   RNTupleWriteOptions options;
+   options.SetCompression(compressionSettings);
+   auto ntuple = RNTupleWriter::Recreate(std::move(model), "mini", outputFile, options);
+
+   auto nEntries = tree->GetEntries();
+   for (decltype(nEntries) i = 0; i < nEntries; ++i) {
+      tree->GetEntry(i);
+      ntuple->Fill();
+   }
+
+   tree->ResetBranchAddresses();
+}