Skip to content

Commit 6297241

Browse files
authored
Added support for cl_ext_float_atomics in CBasicTestFetchAdd with atomic_half (KhronosGroup#2350)
Related to KhronosGroup#2142, according to the work plan, extending CBasicTestFetchAdd with support for atomic_half. I wasn't able to test that PR completely due to missing `CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT`/`CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT` capabilities for atomic_half. I appreciate reviewers' attention, thanks.
1 parent 79d9843 commit 6297241

File tree

2 files changed

+123
-15
lines changed

2 files changed

+123
-15
lines changed

test_conformance/c11_atomics/host_atomics.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,16 @@ template <typename AtomicType, typename CorrespondingType>
9999
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
100100
TExplicitMemoryOrderType order)
101101
{
102-
if constexpr (
102+
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
103+
{
104+
static std::mutex mx;
105+
std::lock_guard<std::mutex> lock(mx);
106+
CorrespondingType old_value = *a;
107+
*a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
108+
gHalfRoundingMode);
109+
return old_value;
110+
}
111+
else if constexpr (
103112
std::is_same_v<
104113
AtomicType,
105114
HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
@@ -112,7 +121,7 @@ CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingTyp
112121
}
113122
else
114123
{
115-
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
124+
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
116125
return InterlockedExchangeAdd(a, c);
117126
#elif defined(__GNUC__)
118127
return __sync_fetch_and_add(a, c);

test_conformance/c11_atomics/test_atomics.cpp

Lines changed: 112 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ class CBasicTestLoad
417417
correct = true;
418418
for (cl_uint i = 0; i < threadCount; i++)
419419
{
420-
if constexpr (std::is_same<HostDataType, cl_half>::value)
420+
if constexpr (std::is_same_v<HostDataType, cl_half>)
421421
{
422422
HostDataType test = cl_half_from_float(static_cast<float>(i),
423423
gHalfRoundingMode);
@@ -1204,17 +1204,79 @@ class CBasicTestFetchAdd
12041204
if constexpr (
12051205
std::is_same_v<
12061206
HostDataType,
1207-
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
1207+
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
12081208
{
12091209
StartValue((HostDataType)0.0);
12101210
CBasicTestMemOrderScope<HostAtomicType,
12111211
HostDataType>::OldValueCheck(false);
12121212
}
12131213
}
1214+
template <typename Iterator> float accum_halfs(Iterator begin, Iterator end)
1215+
{
1216+
cl_half sum = 0;
1217+
for (auto it = begin; it != end; ++it)
1218+
{
1219+
sum = cl_half_from_float(cl_half_to_float(sum)
1220+
+ cl_half_to_float(*it),
1221+
gHalfRoundingMode);
1222+
}
1223+
return cl_half_to_float(sum);
1224+
}
12141225
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
12151226
MTdata d) override
12161227
{
1217-
if constexpr (
1228+
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
1229+
{
1230+
if (threadCount > ref_vals.size())
1231+
{
1232+
ref_vals.resize(threadCount);
1233+
1234+
for (cl_uint i = 0; i < threadCount; i++)
1235+
ref_vals[i] = cl_half_from_float(
1236+
get_random_float(min_range, max_range, d),
1237+
gHalfRoundingMode);
1238+
1239+
memcpy(startRefValues, ref_vals.data(),
1240+
sizeof(HostDataType) * ref_vals.size());
1241+
1242+
// Estimate highest possible summation error for given set.
1243+
std::vector<float> sums;
1244+
std::sort(ref_vals.begin(), ref_vals.end(),
1245+
[](cl_half a, cl_half b) {
1246+
return cl_half_to_float(a) < cl_half_to_float(b);
1247+
});
1248+
1249+
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
1250+
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
1251+
1252+
std::sort(ref_vals.begin(), ref_vals.end(),
1253+
[](cl_half a, cl_half b) {
1254+
return std::abs(cl_half_to_float(a))
1255+
< std::abs(cl_half_to_float(b));
1256+
});
1257+
1258+
float precise = 0.f;
1259+
for (auto elem : ref_vals) precise += cl_half_to_float(elem);
1260+
sums.push_back(precise);
1261+
1262+
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
1263+
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
1264+
1265+
std::sort(sums.begin(), sums.end());
1266+
max_error = std::abs(sums.front() - sums.back());
1267+
1268+
// restore unsorted order
1269+
memcpy(ref_vals.data(), startRefValues,
1270+
sizeof(HostDataType) * ref_vals.size());
1271+
}
1272+
else
1273+
{
1274+
memcpy(startRefValues, ref_vals.data(),
1275+
sizeof(HostDataType) * threadCount);
1276+
}
1277+
return true;
1278+
}
1279+
else if constexpr (
12181280
std::is_same_v<
12191281
HostDataType,
12201282
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
@@ -1286,7 +1348,7 @@ class CBasicTestFetchAdd
12861348
if constexpr (
12871349
std::is_same_v<
12881350
HostDataType,
1289-
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
1351+
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
12901352
{
12911353
return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
12921354
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1323,7 +1385,7 @@ class CBasicTestFetchAdd
13231385
if constexpr (
13241386
std::is_same_v<
13251387
HostDataType,
1326-
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
1388+
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
13271389
{
13281390
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
13291391
MemoryOrder());
@@ -1349,7 +1411,20 @@ class CBasicTestFetchAdd
13491411
cl_uint whichDestValue) override
13501412
{
13511413
expected = StartValue();
1352-
if constexpr (
1414+
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
1415+
{
1416+
if (whichDestValue == 0)
1417+
{
1418+
for (cl_uint i = 0; i < threadCount; i++)
1419+
{
1420+
expected = cl_half_from_float(
1421+
cl_half_to_float(expected)
1422+
+ cl_half_to_float(startRefValues[i]),
1423+
gHalfRoundingMode);
1424+
}
1425+
}
1426+
}
1427+
else if constexpr (
13531428
std::is_same_v<
13541429
HostDataType,
13551430
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
@@ -1371,10 +1446,17 @@ class CBasicTestFetchAdd
13711446
const std::vector<HostAtomicType> &testValues,
13721447
cl_uint whichDestValue) override
13731448
{
1374-
if constexpr (
1449+
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
1450+
{
1451+
if (whichDestValue == 0)
1452+
return std::abs(cl_half_to_float(expected)
1453+
- cl_half_to_float(testValues[whichDestValue]))
1454+
> max_error;
1455+
}
1456+
else if constexpr (
13751457
std::is_same_v<
13761458
HostDataType,
1377-
HOST_DOUBLE> || std::is_same<HostDataType, HOST_FLOAT>::value)
1459+
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
13781460
{
13791461
if (whichDestValue == 0)
13801462
return std::abs((HostDataType)expected
@@ -1389,8 +1471,10 @@ class CBasicTestFetchAdd
13891471
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
13901472
HostAtomicType *finalValues) override
13911473
{
1392-
if (std::is_same<HostDataType, HOST_DOUBLE>::value
1393-
|| std::is_same<HostDataType, HOST_FLOAT>::value)
1474+
if constexpr (
1475+
std::is_same_v<
1476+
HostDataType,
1477+
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
13941478
{
13951479
correct = true;
13961480
for (cl_uint i = 1; i < threadCount; i++)
@@ -1413,7 +1497,17 @@ class CBasicTestFetchAdd
14131497
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
14141498
cl_command_queue queue) override
14151499
{
1416-
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
1500+
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
1501+
{
1502+
if (LocalMemory()
1503+
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
1504+
return 0; // skip test - not applicable
1505+
1506+
if (!LocalMemory()
1507+
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
1508+
return 0;
1509+
}
1510+
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
14171511
{
14181512
if (LocalMemory()
14191513
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1443,7 +1537,7 @@ class CBasicTestFetchAdd
14431537
if constexpr (
14441538
std::is_same_v<
14451539
HostDataType,
1446-
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
1540+
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
14471541
{
14481542
return threadCount;
14491543
}
@@ -1478,6 +1572,11 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
14781572

14791573
if (gFloatAtomicsSupported)
14801574
{
1575+
CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
1576+
TYPE_ATOMIC_HALF, useSVM);
1577+
EXECUTE_TEST(error,
1578+
test_half.Execute(deviceID, context, queue, num_elements));
1579+
14811580
CBasicTestFetchAdd<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
14821581
TYPE_ATOMIC_DOUBLE, useSVM);
14831582
EXECUTE_TEST(
@@ -1737,7 +1836,7 @@ class CBasicTestFetchSub
17371836
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
17381837
HostAtomicType *finalValues) override
17391838
{
1740-
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
1839+
if (std::is_same_v<HostDataType, HOST_FLOAT>)
17411840
{
17421841
correct = true;
17431842
for (cl_uint i = 1; i < threadCount; i++)

0 commit comments

Comments
 (0)