[hist] Implement RHistStatus::Compute{Skewnewss,Kurtosis}

hahnjo · hahnjo · commit d9fd6162b24d · 2025-09-16T15:33:03.000+02:00
diff --git a/hist/histv7/inc/ROOT/RHistStats.hxx b/hist/histv7/inc/ROOT/RHistStats.hxx
@@ -158,6 +158,70 @@ public:
    /// \return the standard deviation of unbinned values
    double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
 
+   // clang-format off
+   /// Compute the skewness of unbinned values.
+   ///
+   /// The skewness is the third standardized moment:
+   /// \f[
+   /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
+   /// \f]
+   /// With support for weighted filling and after some rewriting, it is computed as:
+   /// \f[
+   /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
+   /// \f]
+   ///
+   /// \param[in] dim the dimension index, starting at 0
+   /// \return the skewness of unbinned values
+   // clang-format on
+   double ComputeSkewness(std::size_t dim = 0) const
+   {
+      // First get the statistics, which includes checking the argument.
+      auto &stats = fDimensionStats.at(dim);
+      if (fSumW == 0) {
+         return 0;
+      }
+      double mean = ComputeMean(dim);
+      double var = ComputeVariance(dim);
+      double EWX3 = stats.fSumWX3 / fSumW;
+      double EWX2 = stats.fSumWX2 / fSumW;
+      return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
+   }
+
+   // clang-format off
+   /// Compute the (excess) kurtosis of unbinned values.
+   ///
+   /// The kurtosis is based on the fourth standardized moment:
+   /// \f[
+   /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
+   /// \f]
+   /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
+   /// \f[
+   /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
+   /// \f]
+   ///
+   /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
+   /// \f[
+   /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
+   /// \f]
+   ///
+   /// \param[in] dim the dimension index, starting at 0
+   /// \return the (excess) kurtosis of unbinned values
+   // clang-format on
+   double ComputeKurtosis(std::size_t dim = 0) const
+   {
+      // First get the statistics, which includes checking the argument.
+      auto &stats = fDimensionStats.at(dim);
+      if (fSumW == 0) {
+         return 0;
+      }
+      double mean = ComputeMean(dim);
+      double var = ComputeVariance(dim);
+      double EWX4 = stats.fSumWX4 / fSumW;
+      double EWX3 = stats.fSumWX3 / fSumW;
+      double EWX2 = stats.fSumWX2 / fSumW;
+      return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
+   }
+
 private:
    template <std::size_t I, typename... A>
    void FillImpl(const std::tuple<A...> &args)
diff --git a/hist/histv7/test/hist_stats.cxx b/hist/histv7/test/hist_stats.cxx
@@ -122,6 +122,46 @@ TEST(RHistStats, ComputeStdDev)
    EXPECT_DOUBLE_EQ(stats.ComputeStdDev(2), std::sqrt(12881.05));
 }
 
+TEST(RHistStats, ComputeSkewness)
+{
+   RHistStats stats(3);
+   ASSERT_EQ(stats.GetNEntries(), 0);
+   EXPECT_EQ(stats.ComputeSkewness(/*=0*/), 0);
+   EXPECT_EQ(stats.ComputeSkewness(1), 0);
+   EXPECT_EQ(stats.ComputeSkewness(2), 0);
+
+   static constexpr std::size_t Entries = 20;
+   for (std::size_t i = 0; i < Entries; i++) {
+      stats.Fill(i, 2 * i, i * i);
+   }
+
+   ASSERT_EQ(stats.GetNEntries(), Entries);
+   EXPECT_DOUBLE_EQ(stats.ComputeSkewness(/*=0*/), 0);
+   EXPECT_DOUBLE_EQ(stats.ComputeSkewness(1), 0);
+   // Cross-checked with TH1 and SciPy, numerical differences with EXPECT_DOUBLE_EQ
+   EXPECT_FLOAT_EQ(stats.ComputeSkewness(2), 0.66125456);
+}
+
+TEST(RHistStats, ComputeKurtosis)
+{
+   RHistStats stats(3);
+   ASSERT_EQ(stats.GetNEntries(), 0);
+   EXPECT_EQ(stats.ComputeKurtosis(/*=0*/), 0);
+   EXPECT_EQ(stats.ComputeKurtosis(1), 0);
+   EXPECT_EQ(stats.ComputeKurtosis(2), 0);
+
+   static constexpr std::size_t Entries = 20;
+   for (std::size_t i = 0; i < Entries; i++) {
+      stats.Fill(i, 2 * i, i * i);
+   }
+
+   ASSERT_EQ(stats.GetNEntries(), Entries);
+   // Cross-checked with TH1 and SciPy, numerical differences with EXPECT_DOUBLE_EQ
+   EXPECT_FLOAT_EQ(stats.ComputeKurtosis(/*=0*/), -1.2060150);
+   EXPECT_FLOAT_EQ(stats.ComputeKurtosis(1), -1.2060150);
+   EXPECT_FLOAT_EQ(stats.ComputeKurtosis(2), -0.84198253);
+}
+
 TEST(RHistStats, FillInvalidNumberOfArguments)
 {
    RHistStats stats1(1);