Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions hist/histv7/inc/ROOT/RBinWithError.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#ifndef ROOT_RBinWithError
#define ROOT_RBinWithError

#include "RHistUtils.hxx"

namespace ROOT {
namespace Experimental {

Expand Down Expand Up @@ -45,6 +47,18 @@ struct RBinWithError final {
fSum2 += rhs.fSum2;
return *this;
}

void AtomicInc()
{
Internal::AtomicInc(&fSum);
Internal::AtomicInc(&fSum2);
}

void AtomicAdd(double w)
{
Internal::AtomicAdd(&fSum, w);
Internal::AtomicAdd(&fSum2, w * w);
}
};

} // namespace Experimental
Expand Down
68 changes: 68 additions & 0 deletions hist/histv7/inc/ROOT/RHistEngine.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,74 @@ public:
}
}

/// Fill an entry into the histogram using atomic instructions.
///
/// \param[in] args the arguments for each axis
/// \see Fill(const std::tuple<A...> &args)
template <typename... A>
void FillAtomic(const std::tuple<A...> &args)
{
// We could rely on RAxes::ComputeGlobalIndex to check the number of arguments, but its exception message might
// be confusing for users.
if (sizeof...(A) != GetNDimensions()) {
throw std::invalid_argument("invalid number of arguments to Fill");
}
RLinearizedIndex index = fAxes.ComputeGlobalIndexImpl<sizeof...(A)>(args);
if (index.fValid) {
assert(index.fIndex < fBinContents.size());
Internal::AtomicInc(&fBinContents[index.fIndex]);
}
}

/// Fill an entry into the histogram with a weight using atomic instructions.
///
/// This overload is not available for integral bin content types (see \ref SupportsWeightedFilling).
///
/// \param[in] args the arguments for each axis
/// \param[in] weight the weight for this entry
/// \see Fill(const std::tuple<A...> &args, RWeight weight)
template <typename... A>
void FillAtomic(const std::tuple<A...> &args, RWeight weight)
{
static_assert(SupportsWeightedFilling, "weighted filling is not supported for integral bin content types");

// We could rely on RAxes::ComputeGlobalIndex to check the number of arguments, but its exception message might
// be confusing for users.
if (sizeof...(A) != GetNDimensions()) {
throw std::invalid_argument("invalid number of arguments to Fill");
}
RLinearizedIndex index = fAxes.ComputeGlobalIndexImpl<sizeof...(A)>(args);
if (index.fValid) {
assert(index.fIndex < fBinContents.size());
Internal::AtomicAdd(&fBinContents[index.fIndex], weight.fValue);
}
}

/// Fill an entry into the histogram using atomic instructions.
///
/// \param[in] args the arguments for each axis
/// \see Fill(const A &...args)
template <typename... A>
void FillAtomic(const A &...args)
{
auto t = std::forward_as_tuple(args...);
if constexpr (std::is_same_v<typename Internal::LastType<A...>::type, RWeight>) {
static_assert(SupportsWeightedFilling, "weighted filling is not supported for integral bin content types");
static constexpr std::size_t N = sizeof...(A) - 1;
if (N != fAxes.GetNDimensions()) {
throw std::invalid_argument("invalid number of arguments to Fill");
}
RWeight weight = std::get<N>(t);
RLinearizedIndex index = fAxes.ComputeGlobalIndexImpl<N>(t);
if (index.fValid) {
assert(index.fIndex < fBinContents.size());
Internal::AtomicAdd(&fBinContents[index.fIndex], weight.fValue);
}
} else {
FillAtomic(t);
}
}

/// %ROOT Streamer function to throw when trying to store an object of this class.
void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistEngine"); }
};
Expand Down
192 changes: 192 additions & 0 deletions hist/histv7/inc/ROOT/RHistUtils.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
#ifndef ROOT_RHistUtils
#define ROOT_RHistUtils

#include <type_traits>

#ifdef _MSC_VER
#include <intrin.h>
#endif

namespace ROOT {
namespace Experimental {
namespace Internal {
Expand All @@ -16,6 +22,192 @@ struct LastType<T> {
using type = T;
};

#ifdef _MSC_VER
namespace MSVC {
template <std::size_t N>
struct AtomicOps {};

template <>
struct AtomicOps<1> {
static void Load(const void *ptr, void *ret)
{
*static_cast<char *>(ret) = __iso_volatile_load8(static_cast<const char *>(ptr));
}
static void Add(void *ptr, const void *val)
{
_InterlockedExchangeAdd8(static_cast<char *>(ptr), *static_cast<const char *>(val));
}
static bool CompareExchange(void *ptr, void *expected, const void *desired)
{
// MSVC functions have the arguments reversed...
const char expectedVal = *static_cast<char *>(expected);
const char desiredVal = *static_cast<const char *>(desired);
const char previous = _InterlockedCompareExchange8(static_cast<char *>(ptr), desiredVal, expectedVal);
if (previous == expectedVal) {
return true;
}
*static_cast<char *>(expected) = previous;
return false;
}
};

template <>
struct AtomicOps<2> {
static void Load(const void *ptr, void *ret)
{
*static_cast<short *>(ret) = __iso_volatile_load16(static_cast<const short *>(ptr));
}
static void Add(void *ptr, const void *val)
{
_InterlockedExchangeAdd16(static_cast<short *>(ptr), *static_cast<const short *>(val));
}
static bool CompareExchange(void *ptr, void *expected, const void *desired)
{
// MSVC functions have the arguments reversed...
const short expectedVal = *static_cast<short *>(expected);
const short desiredVal = *static_cast<const short *>(desired);
const short previous = _InterlockedCompareExchange16(static_cast<short *>(ptr), desiredVal, expectedVal);
if (previous == expectedVal) {
return true;
}
*static_cast<short *>(expected) = previous;
return false;
}
};

template <>
struct AtomicOps<4> {
static void Load(const void *ptr, void *ret)
{
*static_cast<int *>(ret) = __iso_volatile_load32(static_cast<const int *>(ptr));
}
static void Add(void *ptr, const void *val)
{
_InterlockedExchangeAdd(static_cast<long *>(ptr), *static_cast<const long *>(val));
}
static bool CompareExchange(void *ptr, void *expected, const void *desired)
{
// MSVC functions have the arguments reversed...
const long expectedVal = *static_cast<long *>(expected);
const long desiredVal = *static_cast<const long *>(desired);
const long previous = _InterlockedCompareExchange(static_cast<long *>(ptr), desiredVal, expectedVal);
if (previous == expectedVal) {
return true;
}
*static_cast<long *>(expected) = previous;
return false;
}
};

template <>
struct AtomicOps<8> {
static void Load(const void *ptr, void *ret)
{
*static_cast<__int64 *>(ret) = __iso_volatile_load64(static_cast<const __int64 *>(ptr));
}
static void Add(void *ptr, const void *val);
static bool CompareExchange(void *ptr, void *expected, const void *desired)
{
// MSVC functions have the arguments reversed...
const __int64 expectedVal = *static_cast<__int64 *>(expected);
const __int64 desiredVal = *static_cast<const __int64 *>(desired);
const __int64 previous = _InterlockedCompareExchange64(static_cast<__int64 *>(ptr), desiredVal, expectedVal);
if (previous == expectedVal) {
return true;
}
*static_cast<__int64 *>(expected) = previous;
return false;
}
};
} // namespace MSVC
#endif

template <typename T>
void AtomicLoad(const T *ptr, T *ret)
{
#ifndef _MSC_VER
__atomic_load(ptr, ret, __ATOMIC_RELAXED);
#else
MSVC::AtomicOps<sizeof(T)>::Load(ptr, ret);
#endif
}

template <typename T>
bool AtomicCompareExchange(T *ptr, T *expected, T *desired)
{
#ifndef _MSC_VER
return __atomic_compare_exchange(ptr, expected, desired, /*weak=*/false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
#else
return MSVC::AtomicOps<sizeof(T)>::CompareExchange(ptr, expected, desired);
#endif
}

template <typename T>
void AtomicAddCompareExchangeLoop(T *ptr, T val)
{
T expected;
AtomicLoad(ptr, &expected);
T desired = expected + val;
while (!AtomicCompareExchange(ptr, &expected, &desired)) {
// expected holds the new value; try again.
desired = expected + val;
}
}

#ifdef _MSC_VER
namespace MSVC {
inline void AtomicOps<8>::Add(void *ptr, const void *val)
{
#if _WIN64
_InterlockedExchangeAdd64(static_cast<__int64 *>(ptr), *static_cast<const __int64 *>(val));
#else
AtomicAddCompareExchangeLoop(static_cast<__int64 *>(ptr), *static_cast<const __int64 *>(val));
#endif
}
} // namespace MSVC
#endif

template <typename T>
std::enable_if_t<std::is_integral_v<T>> AtomicAdd(T *ptr, T val)
{
#ifndef _MSC_VER
__atomic_fetch_add(ptr, val, __ATOMIC_RELAXED);
#else
MSVC::AtomicOps<sizeof(T)>::Add(ptr, &val);
#endif
}

template <typename T>
std::enable_if_t<std::is_floating_point_v<T>> AtomicAdd(T *ptr, T val)
{
AtomicAddCompareExchangeLoop(ptr, val);
}

// For adding a double-precision weight to a single-precision bin content type, cast the argument once before the
// compare-exchange loop.
static inline void AtomicAdd(float *ptr, double val)
{
AtomicAdd(ptr, static_cast<float>(val));
}

template <typename T>
std::enable_if_t<std::is_arithmetic_v<T>> AtomicInc(T *ptr)
{
AtomicAdd(ptr, static_cast<T>(1));
}

template <typename T, typename U>
std::enable_if_t<std::is_member_function_pointer_v<decltype(&T::AtomicAdd)>> AtomicAdd(T *ptr, const U &add)
{
ptr->AtomicAdd(add);
}

template <typename T>
std::enable_if_t<std::is_member_function_pointer_v<decltype(&T::AtomicInc)>> AtomicInc(T *ptr)
{
ptr->AtomicInc();
}

} // namespace Internal
} // namespace Experimental
} // namespace ROOT
Expand Down
2 changes: 2 additions & 0 deletions hist/histv7/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
HIST_ADD_GTEST(hist_atomic hist_atomic.cxx)
HIST_ADD_GTEST(hist_axes hist_axes.cxx)
HIST_ADD_GTEST(hist_categorical hist_categorical.cxx)
HIST_ADD_GTEST(hist_engine hist_engine.cxx)
HIST_ADD_GTEST(hist_engine_atomic hist_engine_atomic.cxx)
HIST_ADD_GTEST(hist_hist hist_hist.cxx)
HIST_ADD_GTEST(hist_index hist_index.cxx)
HIST_ADD_GTEST(hist_regular hist_regular.cxx)
Expand Down
55 changes: 55 additions & 0 deletions hist/histv7/test/hist_atomic.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include "hist_test.hxx"

#include <cstddef>

#ifndef TYPED_TEST_SUITE
#define TYPED_TEST_SUITE TYPED_TEST_CASE
#endif

template <typename T>
class RHistAtomic : public testing::Test {};

using AtomicTypes = testing::Types<char, short, int, long, long long, float, double>;
TYPED_TEST_SUITE(RHistAtomic, AtomicTypes);

TYPED_TEST(RHistAtomic, AtomicInc)
{
TypeParam a = 1;
ROOT::Experimental::Internal::AtomicInc(&a);
EXPECT_EQ(a, 2);
}

TYPED_TEST(RHistAtomic, AtomicAdd)
{
TypeParam a = 1;
const TypeParam b = 2;
ROOT::Experimental::Internal::AtomicAdd(&a, b);
EXPECT_EQ(a, 3);
}

// AtomicInc is implemented in terms of AtomicAdd, so it's sufficient to stress one of them.
TYPED_TEST(RHistAtomic, StressAtomicAdd)
{
static constexpr TypeParam Addend = 1;
static constexpr std::size_t NThreads = 4;
// Reduce number of additions for char to avoid overflow.
static constexpr std::size_t NAddsPerThread = sizeof(TypeParam) == 1 ? 20 : 8000;
static constexpr std::size_t NAdds = NThreads * NAddsPerThread;

TypeParam a = 0;
StressInParallel(NThreads, [&] {
for (std::size_t i = 0; i < NAddsPerThread; i++) {
ROOT::Experimental::Internal::AtomicAdd(&a, Addend);
}
});

EXPECT_EQ(a, NAdds * Addend);
}

TEST(AtomicAdd, FloatDouble)
{
float a = 1;
const double b = 2;
ROOT::Experimental::Internal::AtomicAdd(&a, b);
EXPECT_EQ(a, 3);
}
Loading
Loading