@@ -417,7 +417,7 @@ class CBasicTestLoad
417417 correct = true ;
418418 for (cl_uint i = 0 ; i < threadCount; i++)
419419 {
420- if constexpr (std::is_same <HostDataType, cl_half>::value )
420+ if constexpr (std::is_same_v <HostDataType, cl_half>)
421421 {
422422 HostDataType test = cl_half_from_float (static_cast <float >(i),
423423 gHalfRoundingMode );
@@ -1204,17 +1204,79 @@ class CBasicTestFetchAdd
12041204 if constexpr (
12051205 std::is_same_v<
12061206 HostDataType,
1207- HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
1207+ HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
12081208 {
12091209 StartValue ((HostDataType)0.0 );
12101210 CBasicTestMemOrderScope<HostAtomicType,
12111211 HostDataType>::OldValueCheck (false );
12121212 }
12131213 }
1214+ template <typename Iterator> float accum_halfs (Iterator begin, Iterator end)
1215+ {
1216+ cl_half sum = 0 ;
1217+ for (auto it = begin; it != end; ++it)
1218+ {
1219+ sum = cl_half_from_float (cl_half_to_float (sum)
1220+ + cl_half_to_float (*it),
1221+ gHalfRoundingMode );
1222+ }
1223+ return cl_half_to_float (sum);
1224+ }
12141225 bool GenerateRefs (cl_uint threadCount, HostDataType *startRefValues,
12151226 MTdata d) override
12161227 {
1217- if constexpr (
1228+ if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
1229+ {
1230+ if (threadCount > ref_vals.size ())
1231+ {
1232+ ref_vals.resize (threadCount);
1233+
1234+ for (cl_uint i = 0 ; i < threadCount; i++)
1235+ ref_vals[i] = cl_half_from_float (
1236+ get_random_float (min_range, max_range, d),
1237+ gHalfRoundingMode );
1238+
1239+ memcpy (startRefValues, ref_vals.data (),
1240+ sizeof (HostDataType) * ref_vals.size ());
1241+
1242+ // Estimate highest possible summation error for given set.
1243+ std::vector<float > sums;
1244+ std::sort (ref_vals.begin (), ref_vals.end (),
1245+ [](cl_half a, cl_half b) {
1246+ return cl_half_to_float (a) < cl_half_to_float (b);
1247+ });
1248+
1249+ sums.push_back (accum_halfs (ref_vals.begin (), ref_vals.end ()));
1250+ sums.push_back (accum_halfs (ref_vals.rbegin (), ref_vals.rend ()));
1251+
1252+ std::sort (ref_vals.begin (), ref_vals.end (),
1253+ [](cl_half a, cl_half b) {
1254+ return std::abs (cl_half_to_float (a))
1255+ < std::abs (cl_half_to_float (b));
1256+ });
1257+
1258+ float precise = 0 .f ;
1259+ for (auto elem : ref_vals) precise += cl_half_to_float (elem);
1260+ sums.push_back (precise);
1261+
1262+ sums.push_back (accum_halfs (ref_vals.begin (), ref_vals.end ()));
1263+ sums.push_back (accum_halfs (ref_vals.rbegin (), ref_vals.rend ()));
1264+
1265+ std::sort (sums.begin (), sums.end ());
1266+ max_error = std::abs (sums.front () - sums.back ());
1267+
1268+ // restore unsorted order
1269+ memcpy (ref_vals.data (), startRefValues,
1270+ sizeof (HostDataType) * ref_vals.size ());
1271+ }
1272+ else
1273+ {
1274+ memcpy (startRefValues, ref_vals.data (),
1275+ sizeof (HostDataType) * threadCount);
1276+ }
1277+ return true ;
1278+ }
1279+ else if constexpr (
12181280 std::is_same_v<
12191281 HostDataType,
12201282 HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
@@ -1286,7 +1348,7 @@ class CBasicTestFetchAdd
12861348 if constexpr (
12871349 std::is_same_v<
12881350 HostDataType,
1289- HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
1351+ HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
12901352 {
12911353 return " atomic_fetch_add" + postfix + " (&destMemory[0], ("
12921354 + DataType ().AddSubOperandTypeName () + " )oldValues[tid]"
@@ -1323,7 +1385,7 @@ class CBasicTestFetchAdd
13231385 if constexpr (
13241386 std::is_same_v<
13251387 HostDataType,
1326- HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
1388+ HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
13271389 {
13281390 host_atomic_fetch_add (&destMemory[0 ], (HostDataType)oldValues[tid],
13291391 MemoryOrder ());
@@ -1349,7 +1411,20 @@ class CBasicTestFetchAdd
13491411 cl_uint whichDestValue) override
13501412 {
13511413 expected = StartValue ();
1352- if constexpr (
1414+ if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
1415+ {
1416+ if (whichDestValue == 0 )
1417+ {
1418+ for (cl_uint i = 0 ; i < threadCount; i++)
1419+ {
1420+ expected = cl_half_from_float (
1421+ cl_half_to_float (expected)
1422+ + cl_half_to_float (startRefValues[i]),
1423+ gHalfRoundingMode );
1424+ }
1425+ }
1426+ }
1427+ else if constexpr (
13531428 std::is_same_v<
13541429 HostDataType,
13551430 HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
@@ -1371,10 +1446,17 @@ class CBasicTestFetchAdd
13711446 const std::vector<HostAtomicType> &testValues,
13721447 cl_uint whichDestValue) override
13731448 {
1374- if constexpr (
1449+ if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
1450+ {
1451+ if (whichDestValue == 0 )
1452+ return std::abs (cl_half_to_float (expected)
1453+ - cl_half_to_float (testValues[whichDestValue]))
1454+ > max_error;
1455+ }
1456+ else if constexpr (
13751457 std::is_same_v<
13761458 HostDataType,
1377- HOST_DOUBLE> || std::is_same <HostDataType, HOST_FLOAT>::value )
1459+ HOST_DOUBLE> || std::is_same_v <HostDataType, HOST_FLOAT>)
13781460 {
13791461 if (whichDestValue == 0 )
13801462 return std::abs ((HostDataType)expected
@@ -1389,8 +1471,10 @@ class CBasicTestFetchAdd
13891471 bool VerifyRefs (bool &correct, cl_uint threadCount, HostDataType *refValues,
13901472 HostAtomicType *finalValues) override
13911473 {
1392- if (std::is_same<HostDataType, HOST_DOUBLE>::value
1393- || std::is_same<HostDataType, HOST_FLOAT>::value)
1474+ if constexpr (
1475+ std::is_same_v<
1476+ HostDataType,
1477+ HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
13941478 {
13951479 correct = true ;
13961480 for (cl_uint i = 1 ; i < threadCount; i++)
@@ -1413,7 +1497,17 @@ class CBasicTestFetchAdd
14131497 int ExecuteSingleTest (cl_device_id deviceID, cl_context context,
14141498 cl_command_queue queue) override
14151499 {
1416- if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
1500+ if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
1501+ {
1502+ if (LocalMemory ()
1503+ && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0 )
1504+ return 0 ; // skip test - not applicable
1505+
1506+ if (!LocalMemory ()
1507+ && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0 )
1508+ return 0 ;
1509+ }
1510+ else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
14171511 {
14181512 if (LocalMemory ()
14191513 && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0 )
@@ -1443,7 +1537,7 @@ class CBasicTestFetchAdd
14431537 if constexpr (
14441538 std::is_same_v<
14451539 HostDataType,
1446- HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
1540+ HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
14471541 {
14481542 return threadCount;
14491543 }
@@ -1478,6 +1572,11 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
14781572
14791573 if (gFloatAtomicsSupported )
14801574 {
1575+ CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half (
1576+ TYPE_ATOMIC_HALF, useSVM);
1577+ EXECUTE_TEST (error,
1578+ test_half.Execute (deviceID, context, queue, num_elements));
1579+
14811580 CBasicTestFetchAdd<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double (
14821581 TYPE_ATOMIC_DOUBLE, useSVM);
14831582 EXECUTE_TEST (
@@ -1737,7 +1836,7 @@ class CBasicTestFetchSub
17371836 bool VerifyRefs (bool &correct, cl_uint threadCount, HostDataType *refValues,
17381837 HostAtomicType *finalValues) override
17391838 {
1740- if (std::is_same <HostDataType, HOST_ATOMIC_FLOAT>::value )
1839+ if (std::is_same_v <HostDataType, HOST_FLOAT> )
17411840 {
17421841 correct = true ;
17431842 for (cl_uint i = 1 ; i < threadCount; i++)
0 commit comments