-
Couldn't load subscription status.
- Fork 1.4k
[hist] Implement initial RHistAutoAxisFiller
#20180
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,192 @@ | ||
| /// \file | ||
| /// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes. | ||
| /// Feedback is welcome! | ||
|
|
||
| #ifndef ROOT_RHistAutoAxisFiller | ||
| #define ROOT_RHistAutoAxisFiller | ||
|
|
||
| #include "RHist.hxx" | ||
| #include "RHistEngine.hxx" | ||
| #include "RWeight.hxx" | ||
|
|
||
| #include <algorithm> | ||
| #include <cmath> | ||
| #include <cstddef> | ||
| #include <limits> | ||
| #include <optional> | ||
| #include <stdexcept> | ||
| #include <utility> | ||
| #include <vector> | ||
|
|
||
| namespace ROOT { | ||
| namespace Experimental { | ||
|
|
||
| /** | ||
| A histogram filler that automatically determines the axis interval. | ||
|
|
||
| This class allows filling a regular one-dimensional histogram without specifying an axis interval during construction. | ||
| After a configurable number of buffered entries, or upon request, a RRegularAxis is constructed using the minimum and | ||
| maximum values until that point. This ensures all initial entries are filled into normal bins. Note that this cannot be | ||
| guaranteed for further calls to Fill. | ||
|
|
||
| \code | ||
| ROOT::Experimental::RHistAutoAxisFiller<int> filler(20); | ||
| filler.Fill(1.0); | ||
| filler.Fill(1.5); | ||
| filler.Fill(2.0); | ||
|
|
||
| // The following will implicitly trigger the histogram creation | ||
| auto &hist = filler.GetHist(); | ||
| // hist.GetNEntries() will return 3 | ||
| \endcode | ||
|
|
||
| \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes. | ||
| Feedback is welcome! | ||
| */ | ||
| template <typename BinContentType> | ||
| class RHistAutoAxisFiller final { | ||
| public: | ||
| static constexpr bool SupportsWeightedFilling = RHistEngine<BinContentType>::SupportsWeightedFilling; | ||
|
|
||
| private: | ||
| /// The filled histogram, after it has been constructed | ||
| std::optional<RHist<BinContentType>> fHist; | ||
|
|
||
| /// The number of normal bins | ||
| std::size_t fNNormalBins; | ||
| /// The maximum buffer size until Flush() is automatically called | ||
| std::size_t fMaxBufferSize; | ||
|
|
||
| using BufferElement = std::conditional_t<SupportsWeightedFilling, std::pair<double, RWeight>, double>; | ||
|
|
||
| /// The buffer of filled entries | ||
| std::vector<BufferElement> fBuffer; | ||
| /// The minimum of the filled entries | ||
| double fMinimum = std::numeric_limits<double>::infinity(); | ||
| /// The maximum of the filled entries | ||
| double fMaximum = -std::numeric_limits<double>::infinity(); | ||
|
|
||
| public: | ||
| /// Create a filler object. | ||
| /// | ||
| /// \param[in] nNormalBins the number of normal bins, must be > 0 | ||
| /// \param[in] maxBufferSize the maximum buffer size, must be > 0 | ||
| explicit RHistAutoAxisFiller(std::size_t nNormalBins, std::size_t maxBufferSize = 1024) | ||
| : fNNormalBins(nNormalBins), fMaxBufferSize(maxBufferSize) | ||
| { | ||
| if (nNormalBins == 0) { | ||
| throw std::invalid_argument("nNormalBins must be > 0"); | ||
| } | ||
| if (maxBufferSize == 0) { | ||
| throw std::invalid_argument("maxBufferSize must be > 0"); | ||
| } | ||
| } | ||
|
|
||
| std::size_t GetNNormalBins() const { return fNNormalBins; } | ||
| std::size_t GetMaxBufferSize() const { return fMaxBufferSize; } | ||
|
|
||
| private: | ||
| void BufferImpl(double x, RWeight weight) | ||
| { | ||
| if constexpr (SupportsWeightedFilling) { | ||
| fBuffer.emplace_back(x, weight); | ||
| } else { | ||
| assert(weight.fValue == 1.0); | ||
| // Silence compiler warning about unused parameter | ||
| (void)weight; | ||
| fBuffer.push_back(x); | ||
| } | ||
| fMinimum = std::min(fMinimum, x); | ||
| fMaximum = std::max(fMaximum, x); | ||
|
|
||
| if (fBuffer.size() >= fMaxBufferSize) { | ||
| Flush(); | ||
| } | ||
| } | ||
|
|
||
| public: | ||
| /// Fill an entry into the histogram. | ||
| /// | ||
| /// \param[in] x the argument | ||
| /// \par See also | ||
| /// the \ref Fill(double x, RWeight weight) "overload for weighted filling" | ||
| void Fill(double x) | ||
| { | ||
| // If the histogram exists, forward the Fill call. | ||
| if (fHist) { | ||
| fHist->Fill(x); | ||
| return; | ||
| } | ||
| BufferImpl(x, RWeight(1.0)); | ||
| } | ||
|
|
||
| /// Fill an entry into the histogram with a weight. | ||
| /// | ||
| /// This overload is only available for floating-point bin content types (see | ||
| /// \ref RHistEngine::SupportsWeightedFilling). | ||
| /// | ||
| /// \param[in] x the argument | ||
| /// \param[in] weight the weight for this entry | ||
| /// \par See also | ||
| /// the \ref Fill(double x) "overload for unweighted filling" | ||
| void Fill(double x, RWeight weight) | ||
| { | ||
| // If the histogram exists, forward the Fill call. | ||
| if (fHist) { | ||
| fHist->Fill(x, weight); | ||
| return; | ||
| } | ||
| BufferImpl(x, weight); | ||
| } | ||
|
|
||
| /// Flush the buffer of entries and construct the histogram. | ||
| /// | ||
| /// Throws an exception if the buffer is empty, the axis interval cannot be determined, or if it would be empty | ||
| /// because the minimum equals the maximum. | ||
| void Flush() | ||
| { | ||
| if (fHist) { | ||
| assert(fBuffer.empty() && "buffer should have been emptied"); | ||
| return; | ||
| } | ||
|
|
||
| if (fBuffer.empty()) { | ||
| throw std::runtime_error("buffer is empty, cannot create histogram"); | ||
| } | ||
| if (!std::isfinite(fMinimum) || !std::isfinite(fMaximum)) { | ||
| throw std::runtime_error("could not determine axis interval"); | ||
| } | ||
| if (fMinimum == fMaximum) { | ||
| throw std::runtime_error("axis interval is empty"); | ||
| } | ||
|
|
||
| // Slightly increase the upper limit to make sure the maximum is included in the last bin. | ||
| double high = std::nextafter(fMaximum, std::numeric_limits<double>::infinity()); | ||
| assert(high > fMaximum); | ||
| fHist.emplace(fNNormalBins, std::make_pair(fMinimum, high)); | ||
|
|
||
| for (auto &&x : fBuffer) { | ||
| if constexpr (SupportsWeightedFilling) { | ||
| fHist->Fill(x.first, x.second); | ||
| } else { | ||
| fHist->Fill(x); | ||
| } | ||
| } | ||
| fBuffer.clear(); | ||
| } | ||
|
|
||
| /// Return the constructed histogram. | ||
| /// | ||
| /// \see Flush() | ||
| RHist<BinContentType> &GetHist() | ||
| { | ||
| Flush(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That means that we can't get an empty histogram from the auto filler. Is this the desired behavior? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes correct. It's a good question what we want to happen in this case. If we want to allow an empty histogram, what would be the axis interval and what would we want to happen in case of further fills? Would we modify the constructed histogram (potentially) behind the users' back? If there are many fills, how would the user tell is that now is the time to flush the buffer, while they are already looking at the histogram? I think my proposal for the moment would be to disallow and see if it poses problems that we find a good compromise how to address. |
||
| assert(fHist.has_value()); | ||
| return *fHist; | ||
| } | ||
| }; | ||
|
|
||
| } // namespace Experimental | ||
| } // namespace ROOT | ||
|
|
||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,162 @@ | ||
| #include "hist_test.hxx" | ||
|
|
||
| #include <limits> | ||
| #include <stdexcept> | ||
| #include <utility> | ||
|
|
||
| TEST(RHistAutoAxisFiller, Constructor) | ||
| { | ||
| static constexpr std::size_t Bins = 20; | ||
| RHistAutoAxisFiller<int> filler(Bins); | ||
| EXPECT_EQ(filler.GetNNormalBins(), Bins); | ||
| EXPECT_EQ(filler.GetMaxBufferSize(), 1024); | ||
|
|
||
| EXPECT_THROW(RHistAutoAxisFiller<int>(0), std::invalid_argument); | ||
| EXPECT_THROW(RHistAutoAxisFiller<int>(1, 0), std::invalid_argument); | ||
| } | ||
|
|
||
| TEST(RHistAutoAxisFiller, Fill) | ||
| { | ||
| static constexpr std::size_t Bins = 20; | ||
| RHistAutoAxisFiller<int> filler(Bins); | ||
|
|
||
| // Fill some entries | ||
| for (std::size_t i = 0; i < Bins; i++) { | ||
| filler.Fill(i); | ||
| } | ||
|
|
||
| // NaN should be ignored for the axis interval | ||
| filler.Fill(std::numeric_limits<double>::quiet_NaN()); | ||
|
|
||
| // Get the histogram, which first flushes the buffer | ||
| auto &hist = filler.GetHist(); | ||
| auto &axis = std::get<RRegularAxis>(hist.GetAxes()[0]); | ||
| EXPECT_EQ(axis.GetNNormalBins(), Bins); | ||
| EXPECT_TRUE(axis.HasFlowBins()); | ||
| EXPECT_DOUBLE_EQ(axis.GetLow(), 0); | ||
| EXPECT_DOUBLE_EQ(axis.GetHigh(), Bins - 1); | ||
|
|
||
| EXPECT_EQ(hist.GetNEntries(), Bins + 1); | ||
| EXPECT_EQ(hist.GetBinContent(RBinIndex::Underflow()), 0); | ||
| for (auto index : axis.GetNormalRange()) { | ||
| EXPECT_EQ(hist.GetBinContent(index), 1); | ||
| } | ||
| // The NaN entry | ||
| EXPECT_EQ(hist.GetBinContent(RBinIndex::Overflow()), 1); | ||
|
|
||
| // Fill some more entries that are now directly forwarded to the histogram | ||
| for (std::size_t i = 0; i < Bins; i++) { | ||
| filler.Fill(i); | ||
| } | ||
| for (auto index : axis.GetNormalRange()) { | ||
| EXPECT_EQ(hist.GetBinContent(index), 2); | ||
| } | ||
| } | ||
|
|
||
| TEST(RHistAutoAxisFiller, FillAutoFlush) | ||
| { | ||
| static constexpr std::size_t Bins = 20; | ||
| RHistAutoAxisFiller<int> filler(Bins); | ||
|
|
||
| // Fill entries so that it triggers auto-flushing | ||
| for (std::size_t i = 0; i < 1024; i++) { | ||
| filler.Fill(i); | ||
| } | ||
|
|
||
| // Further entries may land in the flow bins | ||
| filler.Fill(-1); | ||
| filler.Fill(2000); | ||
|
|
||
| auto &hist = filler.GetHist(); | ||
| EXPECT_EQ(hist.GetBinContent(RBinIndex::Underflow()), 1); | ||
| EXPECT_EQ(hist.GetBinContent(RBinIndex::Overflow()), 1); | ||
| } | ||
|
|
||
| TEST(RHistAutoAxisFiller, FillMax0) | ||
| { | ||
| static constexpr std::size_t Bins = 20; | ||
| RHistAutoAxisFiller<int> filler(Bins); | ||
|
|
||
| filler.Fill(-1); | ||
| filler.Fill(0); | ||
|
|
||
| auto &hist = filler.GetHist(); | ||
| EXPECT_EQ(hist.GetBinContent(RBinIndex::Underflow()), 0); | ||
| EXPECT_EQ(hist.GetBinContent(RBinIndex::Overflow()), 0); | ||
| } | ||
|
|
||
hahnjo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| TEST(RHistAutoAxisFiller, FillFloat) | ||
| { | ||
| RHistAutoAxisFiller<float> filler(3); | ||
|
|
||
| filler.Fill(1); | ||
| filler.Fill(2); | ||
|
|
||
| auto &hist = filler.GetHist(); | ||
| EXPECT_EQ(hist.GetBinContent(0), 1); | ||
| EXPECT_EQ(hist.GetBinContent(2), 1); | ||
|
|
||
| filler.Fill(1.5); | ||
| EXPECT_EQ(hist.GetBinContent(1), 1); | ||
| } | ||
|
|
||
| TEST(RHistAutoAxisFiller, FillWeight) | ||
| { | ||
| RHistAutoAxisFiller<float> filler(3); | ||
|
|
||
| filler.Fill(1, RWeight(0.8)); | ||
| filler.Fill(2, RWeight(0.9)); | ||
|
|
||
| auto &hist = filler.GetHist(); | ||
| EXPECT_FLOAT_EQ(hist.GetBinContent(0), 0.8); | ||
| EXPECT_FLOAT_EQ(hist.GetBinContent(2), 0.9); | ||
|
|
||
| filler.Fill(1.5, RWeight(0.85)); | ||
| EXPECT_FLOAT_EQ(hist.GetBinContent(1), 0.85); | ||
| } | ||
|
|
||
| TEST(RHistAutoAxisFiller, FlushError) | ||
| { | ||
| static constexpr std::size_t Bins = 20; | ||
|
|
||
| { | ||
| RHistAutoAxisFiller<int> filler(Bins); | ||
| // Flush without entries | ||
| EXPECT_THROW(filler.Flush(), std::runtime_error); | ||
| } | ||
|
|
||
| { | ||
| RHistAutoAxisFiller<int> filler(Bins); | ||
| // NaN should be ignored for the axis interval | ||
| filler.Fill(std::numeric_limits<double>::quiet_NaN()); | ||
| EXPECT_THROW(filler.Flush(), std::runtime_error); | ||
| } | ||
|
|
||
| { | ||
| RHistAutoAxisFiller<int> filler(Bins); | ||
| // Fill with infinities | ||
| filler.Fill(std::numeric_limits<double>::infinity()); | ||
| filler.Fill(-std::numeric_limits<double>::infinity()); | ||
| EXPECT_THROW(filler.Flush(), std::runtime_error); | ||
| } | ||
|
|
||
| { | ||
| RHistAutoAxisFiller<int> filler(Bins); | ||
| // Fill with identical values | ||
| filler.Fill(1); | ||
| filler.Fill(1); | ||
| EXPECT_THROW(filler.Flush(), std::runtime_error); | ||
| } | ||
| } | ||
|
|
||
| TEST(RHistAutoAxisFiller, GetHist) | ||
| { | ||
| static constexpr std::size_t Bins = 20; | ||
| RHistAutoAxisFiller<int> filler(Bins); | ||
|
|
||
| filler.Fill(0); | ||
| filler.Fill(1); | ||
|
|
||
| // The histogram can be moved out of the filler that constructed it. | ||
| RHist<int> hist(std::move(filler.GetHist())); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want/need to expose this publicly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another good question. A potential use case would be where the user designates a limited set of "sample" entries that are potentially not aligned with
fMaxBufferSize, or multipleRHistAutoAxisFillerwith different fill frequencies that the user wants to "synchronize".