Skip to content

Commit d9fd616

Browse files
committed
[hist] Implement RHistStatus::Compute{Skewnewss,Kurtosis}
1 parent d252045 commit d9fd616

File tree

2 files changed

+104
-0
lines changed

2 files changed

+104
-0
lines changed

hist/histv7/inc/ROOT/RHistStats.hxx

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,70 @@ public:
158158
/// \return the standard deviation of unbinned values
159159
double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
160160

161+
// clang-format off
162+
/// Compute the skewness of unbinned values.
163+
///
164+
/// The skewness is the third standardized moment:
165+
/// \f[
166+
/// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
167+
/// \f]
168+
/// With support for weighted filling and after some rewriting, it is computed as:
169+
/// \f[
170+
/// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
171+
/// \f]
172+
///
173+
/// \param[in] dim the dimension index, starting at 0
174+
/// \return the skewness of unbinned values
175+
// clang-format on
176+
double ComputeSkewness(std::size_t dim = 0) const
177+
{
178+
// First get the statistics, which includes checking the argument.
179+
auto &stats = fDimensionStats.at(dim);
180+
if (fSumW == 0) {
181+
return 0;
182+
}
183+
double mean = ComputeMean(dim);
184+
double var = ComputeVariance(dim);
185+
double EWX3 = stats.fSumWX3 / fSumW;
186+
double EWX2 = stats.fSumWX2 / fSumW;
187+
return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
188+
}
189+
190+
// clang-format off
191+
/// Compute the (excess) kurtosis of unbinned values.
192+
///
193+
/// The kurtosis is based on the fourth standardized moment:
194+
/// \f[
195+
/// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
196+
/// \f]
197+
/// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
198+
/// \f[
199+
/// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
200+
/// \f]
201+
///
202+
/// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
203+
/// \f[
204+
/// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
205+
/// \f]
206+
///
207+
/// \param[in] dim the dimension index, starting at 0
208+
/// \return the (excess) kurtosis of unbinned values
209+
// clang-format on
210+
double ComputeKurtosis(std::size_t dim = 0) const
211+
{
212+
// First get the statistics, which includes checking the argument.
213+
auto &stats = fDimensionStats.at(dim);
214+
if (fSumW == 0) {
215+
return 0;
216+
}
217+
double mean = ComputeMean(dim);
218+
double var = ComputeVariance(dim);
219+
double EWX4 = stats.fSumWX4 / fSumW;
220+
double EWX3 = stats.fSumWX3 / fSumW;
221+
double EWX2 = stats.fSumWX2 / fSumW;
222+
return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
223+
}
224+
161225
private:
162226
template <std::size_t I, typename... A>
163227
void FillImpl(const std::tuple<A...> &args)

hist/histv7/test/hist_stats.cxx

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,46 @@ TEST(RHistStats, ComputeStdDev)
122122
EXPECT_DOUBLE_EQ(stats.ComputeStdDev(2), std::sqrt(12881.05));
123123
}
124124

125+
TEST(RHistStats, ComputeSkewness)
126+
{
127+
RHistStats stats(3);
128+
ASSERT_EQ(stats.GetNEntries(), 0);
129+
EXPECT_EQ(stats.ComputeSkewness(/*=0*/), 0);
130+
EXPECT_EQ(stats.ComputeSkewness(1), 0);
131+
EXPECT_EQ(stats.ComputeSkewness(2), 0);
132+
133+
static constexpr std::size_t Entries = 20;
134+
for (std::size_t i = 0; i < Entries; i++) {
135+
stats.Fill(i, 2 * i, i * i);
136+
}
137+
138+
ASSERT_EQ(stats.GetNEntries(), Entries);
139+
EXPECT_DOUBLE_EQ(stats.ComputeSkewness(/*=0*/), 0);
140+
EXPECT_DOUBLE_EQ(stats.ComputeSkewness(1), 0);
141+
// Cross-checked with TH1 and SciPy, numerical differences with EXPECT_DOUBLE_EQ
142+
EXPECT_FLOAT_EQ(stats.ComputeSkewness(2), 0.66125456);
143+
}
144+
145+
TEST(RHistStats, ComputeKurtosis)
146+
{
147+
RHistStats stats(3);
148+
ASSERT_EQ(stats.GetNEntries(), 0);
149+
EXPECT_EQ(stats.ComputeKurtosis(/*=0*/), 0);
150+
EXPECT_EQ(stats.ComputeKurtosis(1), 0);
151+
EXPECT_EQ(stats.ComputeKurtosis(2), 0);
152+
153+
static constexpr std::size_t Entries = 20;
154+
for (std::size_t i = 0; i < Entries; i++) {
155+
stats.Fill(i, 2 * i, i * i);
156+
}
157+
158+
ASSERT_EQ(stats.GetNEntries(), Entries);
159+
// Cross-checked with TH1 and SciPy, numerical differences with EXPECT_DOUBLE_EQ
160+
EXPECT_FLOAT_EQ(stats.ComputeKurtosis(/*=0*/), -1.2060150);
161+
EXPECT_FLOAT_EQ(stats.ComputeKurtosis(1), -1.2060150);
162+
EXPECT_FLOAT_EQ(stats.ComputeKurtosis(2), -0.84198253);
163+
}
164+
125165
TEST(RHistStats, FillInvalidNumberOfArguments)
126166
{
127167
RHistStats stats1(1);

0 commit comments

Comments
 (0)