diff --git a/hist/histv7/doc/CodeArchitecture.md b/hist/histv7/doc/CodeArchitecture.md index 84ee08cde44dd..1c21078d4760b 100644 --- a/hist/histv7/doc/CodeArchitecture.md +++ b/hist/histv7/doc/CodeArchitecture.md @@ -74,3 +74,9 @@ Objects of this type are passed by value; most notably to `GetBinContent` and `S A range of `RBinIndex` from `begin` (inclusive) to `end` (exclusive). The class exposes an iterator interface that can be used in range-based loops. + +### `RHistAutoAxisFiller` + +A specialized class to automatically determine the axis interval during filling. +It constructs a regular axis based on the minimum and maximum values of the initial entries. +The implementation is currently restricted to one dimension and sequential filling. diff --git a/hist/histv7/headers.cmake b/hist/histv7/headers.cmake index 91a19290b509f..2ef345b3ac53c 100644 --- a/hist/histv7/headers.cmake +++ b/hist/histv7/headers.cmake @@ -5,6 +5,7 @@ set(histv7_headers ROOT/RBinWithError.hxx ROOT/RCategoricalAxis.hxx ROOT/RHist.hxx + ROOT/RHistAutoAxisFiller.hxx ROOT/RHistEngine.hxx ROOT/RHistStats.hxx ROOT/RHistUtils.hxx diff --git a/hist/histv7/inc/ROOT/RHistAutoAxisFiller.hxx b/hist/histv7/inc/ROOT/RHistAutoAxisFiller.hxx new file mode 100644 index 0000000000000..4db8c816666ad --- /dev/null +++ b/hist/histv7/inc/ROOT/RHistAutoAxisFiller.hxx @@ -0,0 +1,192 @@ +/// \file +/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes. +/// Feedback is welcome! + +#ifndef ROOT_RHistAutoAxisFiller +#define ROOT_RHistAutoAxisFiller + +#include "RHist.hxx" +#include "RHistEngine.hxx" +#include "RWeight.hxx" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ROOT { +namespace Experimental { + +/** +A histogram filler that automatically determines the axis interval. + +This class allows filling a regular one-dimensional histogram without specifying an axis interval during construction. +After a configurable number of buffered entries, or upon request, a RRegularAxis is constructed using the minimum and +maximum values until that point. This ensures all initial entries are filled into normal bins. Note that this cannot be +guaranteed for further calls to Fill. + +\code +ROOT::Experimental::RHistAutoAxisFiller filler(20); +filler.Fill(1.0); +filler.Fill(1.5); +filler.Fill(2.0); + +// The following will implicitly trigger the histogram creation +auto &hist = filler.GetHist(); +// hist.GetNEntries() will return 3 +\endcode + +\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes. +Feedback is welcome! +*/ +template +class RHistAutoAxisFiller final { +public: + static constexpr bool SupportsWeightedFilling = RHistEngine::SupportsWeightedFilling; + +private: + /// The filled histogram, after it has been constructed + std::optional> fHist; + + /// The number of normal bins + std::size_t fNNormalBins; + /// The maximum buffer size until Flush() is automatically called + std::size_t fMaxBufferSize; + + using BufferElement = std::conditional_t, double>; + + /// The buffer of filled entries + std::vector fBuffer; + /// The minimum of the filled entries + double fMinimum = std::numeric_limits::infinity(); + /// The maximum of the filled entries + double fMaximum = -std::numeric_limits::infinity(); + +public: + /// Create a filler object. + /// + /// \param[in] nNormalBins the number of normal bins, must be > 0 + /// \param[in] maxBufferSize the maximum buffer size, must be > 0 + explicit RHistAutoAxisFiller(std::size_t nNormalBins, std::size_t maxBufferSize = 1024) + : fNNormalBins(nNormalBins), fMaxBufferSize(maxBufferSize) + { + if (nNormalBins == 0) { + throw std::invalid_argument("nNormalBins must be > 0"); + } + if (maxBufferSize == 0) { + throw std::invalid_argument("maxBufferSize must be > 0"); + } + } + + std::size_t GetNNormalBins() const { return fNNormalBins; } + std::size_t GetMaxBufferSize() const { return fMaxBufferSize; } + +private: + void BufferImpl(double x, RWeight weight) + { + if constexpr (SupportsWeightedFilling) { + fBuffer.emplace_back(x, weight); + } else { + assert(weight.fValue == 1.0); + // Silence compiler warning about unused parameter + (void)weight; + fBuffer.push_back(x); + } + fMinimum = std::min(fMinimum, x); + fMaximum = std::max(fMaximum, x); + + if (fBuffer.size() >= fMaxBufferSize) { + Flush(); + } + } + +public: + /// Fill an entry into the histogram. + /// + /// \param[in] x the argument + /// \par See also + /// the \ref Fill(double x, RWeight weight) "overload for weighted filling" + void Fill(double x) + { + // If the histogram exists, forward the Fill call. + if (fHist) { + fHist->Fill(x); + return; + } + BufferImpl(x, RWeight(1.0)); + } + + /// Fill an entry into the histogram with a weight. + /// + /// This overload is only available for floating-point bin content types (see + /// \ref RHistEngine::SupportsWeightedFilling). + /// + /// \param[in] x the argument + /// \param[in] weight the weight for this entry + /// \par See also + /// the \ref Fill(double x) "overload for unweighted filling" + void Fill(double x, RWeight weight) + { + // If the histogram exists, forward the Fill call. + if (fHist) { + fHist->Fill(x, weight); + return; + } + BufferImpl(x, weight); + } + + /// Flush the buffer of entries and construct the histogram. + /// + /// Throws an exception if the buffer is empty, the axis interval cannot be determined, or if it would be empty + /// because the minimum equals the maximum. + void Flush() + { + if (fHist) { + assert(fBuffer.empty() && "buffer should have been emptied"); + return; + } + + if (fBuffer.empty()) { + throw std::runtime_error("buffer is empty, cannot create histogram"); + } + if (!std::isfinite(fMinimum) || !std::isfinite(fMaximum)) { + throw std::runtime_error("could not determine axis interval"); + } + if (fMinimum == fMaximum) { + throw std::runtime_error("axis interval is empty"); + } + + // Slightly increase the upper limit to make sure the maximum is included in the last bin. + double high = std::nextafter(fMaximum, std::numeric_limits::infinity()); + assert(high > fMaximum); + fHist.emplace(fNNormalBins, std::make_pair(fMinimum, high)); + + for (auto &&x : fBuffer) { + if constexpr (SupportsWeightedFilling) { + fHist->Fill(x.first, x.second); + } else { + fHist->Fill(x); + } + } + fBuffer.clear(); + } + + /// Return the constructed histogram. + /// + /// \see Flush() + RHist &GetHist() + { + Flush(); + assert(fHist.has_value()); + return *fHist; + } +}; + +} // namespace Experimental +} // namespace ROOT + +#endif diff --git a/hist/histv7/inc/ROOT/RRegularAxis.hxx b/hist/histv7/inc/ROOT/RRegularAxis.hxx index e5961bc93bb7c..aa5f90234d1e9 100644 --- a/hist/histv7/inc/ROOT/RRegularAxis.hxx +++ b/hist/histv7/inc/ROOT/RRegularAxis.hxx @@ -25,7 +25,7 @@ A regular axis with equidistant bins in the interval \f$[fLow, fHigh)\f$. For example, the following creates a regular axis with 10 normal bins between 5 and 15: \code -ROOT::Experimental::RRegularAxis axis(10, 5, 15); +ROOT::Experimental::RRegularAxis axis(10, {5, 15}); \endcode It is possible to disable underflow and overflow bins by passing `enableFlowBins = false`. In that case, arguments @@ -101,6 +101,9 @@ public: } std::size_t bin = (x - fLow) * fInvBinWidth; + if (bin >= fNNormalBins) { + bin = fNNormalBins - 1; + } return {bin, true}; } diff --git a/hist/histv7/test/CMakeLists.txt b/hist/histv7/test/CMakeLists.txt index 36055aba99cc7..4cc92034d7832 100644 --- a/hist/histv7/test/CMakeLists.txt +++ b/hist/histv7/test/CMakeLists.txt @@ -1,3 +1,4 @@ +HIST_ADD_GTEST(hist_auto hist_auto.cxx) HIST_ADD_GTEST(hist_axes hist_axes.cxx) HIST_ADD_GTEST(hist_categorical hist_categorical.cxx) HIST_ADD_GTEST(hist_engine hist_engine.cxx) diff --git a/hist/histv7/test/hist_auto.cxx b/hist/histv7/test/hist_auto.cxx new file mode 100644 index 0000000000000..8a3d900f51e75 --- /dev/null +++ b/hist/histv7/test/hist_auto.cxx @@ -0,0 +1,162 @@ +#include "hist_test.hxx" + +#include +#include +#include + +TEST(RHistAutoAxisFiller, Constructor) +{ + static constexpr std::size_t Bins = 20; + RHistAutoAxisFiller filler(Bins); + EXPECT_EQ(filler.GetNNormalBins(), Bins); + EXPECT_EQ(filler.GetMaxBufferSize(), 1024); + + EXPECT_THROW(RHistAutoAxisFiller(0), std::invalid_argument); + EXPECT_THROW(RHistAutoAxisFiller(1, 0), std::invalid_argument); +} + +TEST(RHistAutoAxisFiller, Fill) +{ + static constexpr std::size_t Bins = 20; + RHistAutoAxisFiller filler(Bins); + + // Fill some entries + for (std::size_t i = 0; i < Bins; i++) { + filler.Fill(i); + } + + // NaN should be ignored for the axis interval + filler.Fill(std::numeric_limits::quiet_NaN()); + + // Get the histogram, which first flushes the buffer + auto &hist = filler.GetHist(); + auto &axis = std::get(hist.GetAxes()[0]); + EXPECT_EQ(axis.GetNNormalBins(), Bins); + EXPECT_TRUE(axis.HasFlowBins()); + EXPECT_DOUBLE_EQ(axis.GetLow(), 0); + EXPECT_DOUBLE_EQ(axis.GetHigh(), Bins - 1); + + EXPECT_EQ(hist.GetNEntries(), Bins + 1); + EXPECT_EQ(hist.GetBinContent(RBinIndex::Underflow()), 0); + for (auto index : axis.GetNormalRange()) { + EXPECT_EQ(hist.GetBinContent(index), 1); + } + // The NaN entry + EXPECT_EQ(hist.GetBinContent(RBinIndex::Overflow()), 1); + + // Fill some more entries that are now directly forwarded to the histogram + for (std::size_t i = 0; i < Bins; i++) { + filler.Fill(i); + } + for (auto index : axis.GetNormalRange()) { + EXPECT_EQ(hist.GetBinContent(index), 2); + } +} + +TEST(RHistAutoAxisFiller, FillAutoFlush) +{ + static constexpr std::size_t Bins = 20; + RHistAutoAxisFiller filler(Bins); + + // Fill entries so that it triggers auto-flushing + for (std::size_t i = 0; i < 1024; i++) { + filler.Fill(i); + } + + // Further entries may land in the flow bins + filler.Fill(-1); + filler.Fill(2000); + + auto &hist = filler.GetHist(); + EXPECT_EQ(hist.GetBinContent(RBinIndex::Underflow()), 1); + EXPECT_EQ(hist.GetBinContent(RBinIndex::Overflow()), 1); +} + +TEST(RHistAutoAxisFiller, FillMax0) +{ + static constexpr std::size_t Bins = 20; + RHistAutoAxisFiller filler(Bins); + + filler.Fill(-1); + filler.Fill(0); + + auto &hist = filler.GetHist(); + EXPECT_EQ(hist.GetBinContent(RBinIndex::Underflow()), 0); + EXPECT_EQ(hist.GetBinContent(RBinIndex::Overflow()), 0); +} + +TEST(RHistAutoAxisFiller, FillFloat) +{ + RHistAutoAxisFiller filler(3); + + filler.Fill(1); + filler.Fill(2); + + auto &hist = filler.GetHist(); + EXPECT_EQ(hist.GetBinContent(0), 1); + EXPECT_EQ(hist.GetBinContent(2), 1); + + filler.Fill(1.5); + EXPECT_EQ(hist.GetBinContent(1), 1); +} + +TEST(RHistAutoAxisFiller, FillWeight) +{ + RHistAutoAxisFiller filler(3); + + filler.Fill(1, RWeight(0.8)); + filler.Fill(2, RWeight(0.9)); + + auto &hist = filler.GetHist(); + EXPECT_FLOAT_EQ(hist.GetBinContent(0), 0.8); + EXPECT_FLOAT_EQ(hist.GetBinContent(2), 0.9); + + filler.Fill(1.5, RWeight(0.85)); + EXPECT_FLOAT_EQ(hist.GetBinContent(1), 0.85); +} + +TEST(RHistAutoAxisFiller, FlushError) +{ + static constexpr std::size_t Bins = 20; + + { + RHistAutoAxisFiller filler(Bins); + // Flush without entries + EXPECT_THROW(filler.Flush(), std::runtime_error); + } + + { + RHistAutoAxisFiller filler(Bins); + // NaN should be ignored for the axis interval + filler.Fill(std::numeric_limits::quiet_NaN()); + EXPECT_THROW(filler.Flush(), std::runtime_error); + } + + { + RHistAutoAxisFiller filler(Bins); + // Fill with infinities + filler.Fill(std::numeric_limits::infinity()); + filler.Fill(-std::numeric_limits::infinity()); + EXPECT_THROW(filler.Flush(), std::runtime_error); + } + + { + RHistAutoAxisFiller filler(Bins); + // Fill with identical values + filler.Fill(1); + filler.Fill(1); + EXPECT_THROW(filler.Flush(), std::runtime_error); + } +} + +TEST(RHistAutoAxisFiller, GetHist) +{ + static constexpr std::size_t Bins = 20; + RHistAutoAxisFiller filler(Bins); + + filler.Fill(0); + filler.Fill(1); + + // The histogram can be moved out of the filler that constructed it. + RHist hist(std::move(filler.GetHist())); +} diff --git a/hist/histv7/test/hist_regular.cxx b/hist/histv7/test/hist_regular.cxx index e2c9c3233156a..434ee0c063b96 100644 --- a/hist/histv7/test/hist_regular.cxx +++ b/hist/histv7/test/hist_regular.cxx @@ -113,6 +113,16 @@ TEST(RRegularAxis, ComputeLinearizedIndex) } } +TEST(RRegularAxis, ComputeLinearizedIndexMin) +{ + static constexpr std::size_t Bins = 20; + const RRegularAxis axis(Bins, {-1, std::numeric_limits::min()}); + + auto linIndex = axis.ComputeLinearizedIndex(0); + EXPECT_EQ(linIndex.fIndex, Bins - 1); + EXPECT_TRUE(linIndex.fValid); +} + TEST(RRegularAxis, GetLinearizedIndex) { static constexpr std::size_t Bins = 20; diff --git a/hist/histv7/test/hist_test.hxx b/hist/histv7/test/hist_test.hxx index b9109388836f7..ba7983f0ff627 100644 --- a/hist/histv7/test/hist_test.hxx +++ b/hist/histv7/test/hist_test.hxx @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,7 @@ using ROOT::Experimental::RBinIndexRange; using ROOT::Experimental::RBinWithError; using ROOT::Experimental::RCategoricalAxis; using ROOT::Experimental::RHist; +using ROOT::Experimental::RHistAutoAxisFiller; using ROOT::Experimental::RHistEngine; using ROOT::Experimental::RHistStats; using ROOT::Experimental::RRegularAxis;