Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hist/histv7/doc/CodeArchitecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,9 @@ Objects of this type are passed by value; most notably to `GetBinContent` and `S

A range of `RBinIndex` from `begin` (inclusive) to `end` (exclusive).
The class exposes an iterator interface that can be used in range-based loops.

### `RHistAutoAxisFiller`

A specialized class to automatically determine the axis interval during filling.
It constructs a regular axis based on the minimum and maximum values of the initial entries.
The implementation is currently restricted to one dimension and sequential filling.
1 change: 1 addition & 0 deletions hist/histv7/headers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set(histv7_headers
ROOT/RBinWithError.hxx
ROOT/RCategoricalAxis.hxx
ROOT/RHist.hxx
ROOT/RHistAutoAxisFiller.hxx
ROOT/RHistEngine.hxx
ROOT/RHistStats.hxx
ROOT/RHistUtils.hxx
Expand Down
192 changes: 192 additions & 0 deletions hist/histv7/inc/ROOT/RHistAutoAxisFiller.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/// \file
/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
/// Feedback is welcome!

#ifndef ROOT_RHistAutoAxisFiller
#define ROOT_RHistAutoAxisFiller

#include "RHist.hxx"
#include "RHistEngine.hxx"
#include "RWeight.hxx"

#include <algorithm>
#include <cmath>
#include <cstddef>
#include <limits>
#include <optional>
#include <stdexcept>
#include <utility>
#include <vector>

namespace ROOT {
namespace Experimental {

/**
A histogram filler that automatically determines the axis interval.

This class allows filling a regular one-dimensional histogram without specifying an axis interval during construction.
After a configurable number of buffered entries, or upon request, a RRegularAxis is constructed using the minimum and
maximum values until that point. This ensures all initial entries are filled into normal bins. Note that this cannot be
guaranteed for further calls to Fill.

\code
ROOT::Experimental::RHistAutoAxisFiller<int> filler(20);
filler.Fill(1.0);
filler.Fill(1.5);
filler.Fill(2.0);

// The following will implicitly trigger the histogram creation
auto &hist = filler.GetHist();
// hist.GetNEntries() will return 3
\endcode

\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
Feedback is welcome!
*/
template <typename BinContentType>
class RHistAutoAxisFiller final {
public:
static constexpr bool SupportsWeightedFilling = RHistEngine<BinContentType>::SupportsWeightedFilling;

private:
/// The filled histogram, after it has been constructed
std::optional<RHist<BinContentType>> fHist;

/// The number of normal bins
std::size_t fNNormalBins;
/// The maximum buffer size until Flush() is automatically called
std::size_t fMaxBufferSize;

using BufferElement = std::conditional_t<SupportsWeightedFilling, std::pair<double, RWeight>, double>;

/// The buffer of filled entries
std::vector<BufferElement> fBuffer;
/// The minimum of the filled entries
double fMinimum = std::numeric_limits<double>::infinity();
/// The maximum of the filled entries
double fMaximum = -std::numeric_limits<double>::infinity();

public:
/// Create a filler object.
///
/// \param[in] nNormalBins the number of normal bins, must be > 0
/// \param[in] maxBufferSize the maximum buffer size, must be > 0
explicit RHistAutoAxisFiller(std::size_t nNormalBins, std::size_t maxBufferSize = 1024)
: fNNormalBins(nNormalBins), fMaxBufferSize(maxBufferSize)
{
if (nNormalBins == 0) {
throw std::invalid_argument("nNormalBins must be > 0");
}
if (maxBufferSize == 0) {
throw std::invalid_argument("maxBufferSize must be > 0");
}
}

std::size_t GetNNormalBins() const { return fNNormalBins; }
std::size_t GetMaxBufferSize() const { return fMaxBufferSize; }

private:
void BufferImpl(double x, RWeight weight)
{
if constexpr (SupportsWeightedFilling) {
fBuffer.emplace_back(x, weight);
} else {
assert(weight.fValue == 1.0);
// Silence compiler warning about unused parameter
(void)weight;
fBuffer.push_back(x);
}
fMinimum = std::min(fMinimum, x);
fMaximum = std::max(fMaximum, x);

if (fBuffer.size() >= fMaxBufferSize) {
Flush();
}
}

public:
/// Fill an entry into the histogram.
///
/// \param[in] x the argument
/// \par See also
/// the \ref Fill(double x, RWeight weight) "overload for weighted filling"
void Fill(double x)
{
// If the histogram exists, forward the Fill call.
if (fHist) {
fHist->Fill(x);
return;
}
BufferImpl(x, RWeight(1.0));
}

/// Fill an entry into the histogram with a weight.
///
/// This overload is only available for floating-point bin content types (see
/// \ref RHistEngine::SupportsWeightedFilling).
///
/// \param[in] x the argument
/// \param[in] weight the weight for this entry
/// \par See also
/// the \ref Fill(double x) "overload for unweighted filling"
void Fill(double x, RWeight weight)
{
// If the histogram exists, forward the Fill call.
if (fHist) {
fHist->Fill(x, weight);
return;
}
BufferImpl(x, weight);
}

/// Flush the buffer of entries and construct the histogram.
///
/// Throws an exception if the buffer is empty, the axis interval cannot be determined, or if it would be empty
/// because the minimum equals the maximum.
void Flush()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want/need to expose this publicly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another good question. A potential use case would be where the user designates a limited set of "sample" entries that are potentially not aligned with fMaxBufferSize, or multiple RHistAutoAxisFiller with different fill frequencies that the user wants to "synchronize".

{
if (fHist) {
assert(fBuffer.empty() && "buffer should have been emptied");
return;
}

if (fBuffer.empty()) {
throw std::runtime_error("buffer is empty, cannot create histogram");
}
if (!std::isfinite(fMinimum) || !std::isfinite(fMaximum)) {
throw std::runtime_error("could not determine axis interval");
}
if (fMinimum == fMaximum) {
throw std::runtime_error("axis interval is empty");
}

// Slightly increase the upper limit to make sure the maximum is included in the last bin.
double high = std::nextafter(fMaximum, std::numeric_limits<double>::infinity());
assert(high > fMaximum);
fHist.emplace(fNNormalBins, std::make_pair(fMinimum, high));

for (auto &&x : fBuffer) {
if constexpr (SupportsWeightedFilling) {
fHist->Fill(x.first, x.second);
} else {
fHist->Fill(x);
}
}
fBuffer.clear();
}

/// Return the constructed histogram.
///
/// \see Flush()
RHist<BinContentType> &GetHist()
{
Flush();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That means that we can't get an empty histogram from the auto filler. Is this the desired behavior?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes correct. It's a good question what we want to happen in this case. If we want to allow an empty histogram, what would be the axis interval and what would we want to happen in case of further fills? Would we modify the constructed histogram (potentially) behind the users' back? If there are many fills, how would the user tell is that now is the time to flush the buffer, while they are already looking at the histogram?

I think my proposal for the moment would be to disallow and see if it poses problems that we find a good compromise how to address.

assert(fHist.has_value());
return *fHist;
}
};

} // namespace Experimental
} // namespace ROOT

#endif
5 changes: 4 additions & 1 deletion hist/histv7/inc/ROOT/RRegularAxis.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ A regular axis with equidistant bins in the interval \f$[fLow, fHigh)\f$.

For example, the following creates a regular axis with 10 normal bins between 5 and 15:
\code
ROOT::Experimental::RRegularAxis axis(10, 5, 15);
ROOT::Experimental::RRegularAxis axis(10, {5, 15});
\endcode

It is possible to disable underflow and overflow bins by passing `enableFlowBins = false`. In that case, arguments
Expand Down Expand Up @@ -101,6 +101,9 @@ public:
}

std::size_t bin = (x - fLow) * fInvBinWidth;
if (bin >= fNNormalBins) {
bin = fNNormalBins - 1;
}
return {bin, true};
}

Expand Down
1 change: 1 addition & 0 deletions hist/histv7/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
HIST_ADD_GTEST(hist_auto hist_auto.cxx)
HIST_ADD_GTEST(hist_axes hist_axes.cxx)
HIST_ADD_GTEST(hist_categorical hist_categorical.cxx)
HIST_ADD_GTEST(hist_engine hist_engine.cxx)
Expand Down
162 changes: 162 additions & 0 deletions hist/histv7/test/hist_auto.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#include "hist_test.hxx"

#include <limits>
#include <stdexcept>
#include <utility>

TEST(RHistAutoAxisFiller, Constructor)
{
static constexpr std::size_t Bins = 20;
RHistAutoAxisFiller<int> filler(Bins);
EXPECT_EQ(filler.GetNNormalBins(), Bins);
EXPECT_EQ(filler.GetMaxBufferSize(), 1024);

EXPECT_THROW(RHistAutoAxisFiller<int>(0), std::invalid_argument);
EXPECT_THROW(RHistAutoAxisFiller<int>(1, 0), std::invalid_argument);
}

TEST(RHistAutoAxisFiller, Fill)
{
static constexpr std::size_t Bins = 20;
RHistAutoAxisFiller<int> filler(Bins);

// Fill some entries
for (std::size_t i = 0; i < Bins; i++) {
filler.Fill(i);
}

// NaN should be ignored for the axis interval
filler.Fill(std::numeric_limits<double>::quiet_NaN());

// Get the histogram, which first flushes the buffer
auto &hist = filler.GetHist();
auto &axis = std::get<RRegularAxis>(hist.GetAxes()[0]);
EXPECT_EQ(axis.GetNNormalBins(), Bins);
EXPECT_TRUE(axis.HasFlowBins());
EXPECT_DOUBLE_EQ(axis.GetLow(), 0);
EXPECT_DOUBLE_EQ(axis.GetHigh(), Bins - 1);

EXPECT_EQ(hist.GetNEntries(), Bins + 1);
EXPECT_EQ(hist.GetBinContent(RBinIndex::Underflow()), 0);
for (auto index : axis.GetNormalRange()) {
EXPECT_EQ(hist.GetBinContent(index), 1);
}
// The NaN entry
EXPECT_EQ(hist.GetBinContent(RBinIndex::Overflow()), 1);

// Fill some more entries that are now directly forwarded to the histogram
for (std::size_t i = 0; i < Bins; i++) {
filler.Fill(i);
}
for (auto index : axis.GetNormalRange()) {
EXPECT_EQ(hist.GetBinContent(index), 2);
}
}

TEST(RHistAutoAxisFiller, FillAutoFlush)
{
static constexpr std::size_t Bins = 20;
RHistAutoAxisFiller<int> filler(Bins);

// Fill entries so that it triggers auto-flushing
for (std::size_t i = 0; i < 1024; i++) {
filler.Fill(i);
}

// Further entries may land in the flow bins
filler.Fill(-1);
filler.Fill(2000);

auto &hist = filler.GetHist();
EXPECT_EQ(hist.GetBinContent(RBinIndex::Underflow()), 1);
EXPECT_EQ(hist.GetBinContent(RBinIndex::Overflow()), 1);
}

TEST(RHistAutoAxisFiller, FillMax0)
{
static constexpr std::size_t Bins = 20;
RHistAutoAxisFiller<int> filler(Bins);

filler.Fill(-1);
filler.Fill(0);

auto &hist = filler.GetHist();
EXPECT_EQ(hist.GetBinContent(RBinIndex::Underflow()), 0);
EXPECT_EQ(hist.GetBinContent(RBinIndex::Overflow()), 0);
}

TEST(RHistAutoAxisFiller, FillFloat)
{
RHistAutoAxisFiller<float> filler(3);

filler.Fill(1);
filler.Fill(2);

auto &hist = filler.GetHist();
EXPECT_EQ(hist.GetBinContent(0), 1);
EXPECT_EQ(hist.GetBinContent(2), 1);

filler.Fill(1.5);
EXPECT_EQ(hist.GetBinContent(1), 1);
}

TEST(RHistAutoAxisFiller, FillWeight)
{
RHistAutoAxisFiller<float> filler(3);

filler.Fill(1, RWeight(0.8));
filler.Fill(2, RWeight(0.9));

auto &hist = filler.GetHist();
EXPECT_FLOAT_EQ(hist.GetBinContent(0), 0.8);
EXPECT_FLOAT_EQ(hist.GetBinContent(2), 0.9);

filler.Fill(1.5, RWeight(0.85));
EXPECT_FLOAT_EQ(hist.GetBinContent(1), 0.85);
}

TEST(RHistAutoAxisFiller, FlushError)
{
static constexpr std::size_t Bins = 20;

{
RHistAutoAxisFiller<int> filler(Bins);
// Flush without entries
EXPECT_THROW(filler.Flush(), std::runtime_error);
}

{
RHistAutoAxisFiller<int> filler(Bins);
// NaN should be ignored for the axis interval
filler.Fill(std::numeric_limits<double>::quiet_NaN());
EXPECT_THROW(filler.Flush(), std::runtime_error);
}

{
RHistAutoAxisFiller<int> filler(Bins);
// Fill with infinities
filler.Fill(std::numeric_limits<double>::infinity());
filler.Fill(-std::numeric_limits<double>::infinity());
EXPECT_THROW(filler.Flush(), std::runtime_error);
}

{
RHistAutoAxisFiller<int> filler(Bins);
// Fill with identical values
filler.Fill(1);
filler.Fill(1);
EXPECT_THROW(filler.Flush(), std::runtime_error);
}
}

TEST(RHistAutoAxisFiller, GetHist)
{
static constexpr std::size_t Bins = 20;
RHistAutoAxisFiller<int> filler(Bins);

filler.Fill(0);
filler.Fill(1);

// The histogram can be moved out of the filler that constructed it.
RHist<int> hist(std::move(filler.GetHist()));
}
Loading
Loading