Add ULP comparison support for Float16 (#183)

kmpeng · web-flow · commit 24164117f83b · 2025-05-16T10:31:48.000-07:00
* finished implementation and tests

* rename half to float16

* change ZeroInitSize to 4 to fix failing D3D12-Intel-DXC test

* add float16 in readme

* address PR comments

* change readme float16 data
diff --git a/README.md b/README.md
@@ -64,6 +64,14 @@ Buffers:
     Format: Float32
     Stride: 4
     Data: [ 0.0, 1.0 ]
+  - Name: Out2 # Buffer where our output will go
+    Format: Float16
+    Stride: 2
+    ZeroInitSize: 4 # ZeroInitSize needs to be 4 bytes minimum
+  - Name: Expected2 # Buffer which stores the expected result of our test
+    Format: Float16
+    Stride: 2
+    Data: [ 0x1, 0x2 ]
 Results: # Using Result can verify test values without filecheck
   - Result: Test1
     Rule: BufferFuzzy # Rule which can be used to compare Float Buffers; They are compared within a ULP range
diff --git a/lib/Support/Check.cpp b/lib/Support/Check.cpp
@@ -15,12 +15,16 @@
 
 static bool isDenorm(float F) { return std::fpclassify(F) == FP_SUBNORMAL; }
 
+static bool isFloat16NAN(uint16_t Val) {
+  return (Val & 0x7c00) == 0x7c00 && (Val & 0x03ff) != 0;
+}
+
 static bool compareFloatULP(const float &FSrc, const float &FRef,
                             unsigned ULPTolerance, offloadtest::DenormMode DM) {
   if (FSrc == FRef)
     return true;
-  if (std::isnan(FSrc))
-    return std::isnan(FRef);
+  if (std::isnan(FSrc) || std::isnan(FRef))
+    return std::isnan(FRef) && std::isnan(FSrc);
   if (DM == offloadtest::DenormMode::Any) {
     // If denorm expected, output can be sign preserved zero. Otherwise output
     // should pass the regular ulp testing.
@@ -34,6 +38,18 @@ static bool compareFloatULP(const float &FSrc, const float &FRef,
   return AbsDiff <= ULPTolerance;
 }
 
+static bool compareFloat16ULP(const uint16_t &FSrc, const uint16_t &FRef,
+                              unsigned ULPTolerance) {
+  if (FSrc == FRef)
+    return true;
+  if (isFloat16NAN(FSrc) || isFloat16NAN(FRef))
+    return isFloat16NAN(FRef) && isFloat16NAN(FSrc);
+  // 16-bit floating point numbers must preserve denorms
+  int Diff = FSrc - FRef;
+  unsigned int AbsDiff = Diff < 0 ? -Diff : Diff;
+  return AbsDiff <= ULPTolerance;
+}
+
 static bool testBufferExact(offloadtest::Buffer *B1, offloadtest::Buffer *B2) {
   if (B1->size() != B2->size())
     return false;
@@ -50,18 +66,33 @@ static bool testBufferFuzzy(offloadtest::Buffer *B1, offloadtest::Buffer *B2,
   case offloadtest::DataFormat::Float32: {
     if (B1->Size != B2->Size)
       return false;
-    llvm::MutableArrayRef<float> Arr1(reinterpret_cast<float *>(B1->Data.get()),
-                                      B1->Size / sizeof(float));
+    llvm::ArrayRef<float> Arr1(reinterpret_cast<float *>(B1->Data.get()),
+                               B1->Size / sizeof(float));
     assert(B2->Format == offloadtest::DataFormat::Float32 &&
            "Buffer types must be the same");
-    llvm::MutableArrayRef<float> Arr2(reinterpret_cast<float *>(B2->Data.get()),
-                                      B2->Size / sizeof(float));
-    for (unsigned I = 0; I < Arr1.size(); ++I) {
+    llvm::ArrayRef<float> Arr2(reinterpret_cast<float *>(B2->Data.get()),
+                               B2->Size / sizeof(float));
+    for (unsigned I = 0, E = Arr1.size(); I < E; ++I) {
       if (!compareFloatULP(Arr1[I], Arr2[I], ULPT, DM))
         return false;
     }
     return true;
   }
+  case offloadtest::DataFormat::Float16: {
+    if (B1->Size != B2->Size)
+      return false;
+    llvm::ArrayRef<uint16_t> Arr1(reinterpret_cast<uint16_t *>(B1->Data.get()),
+                                  B1->Size / sizeof(uint16_t));
+    assert(B2->Format == offloadtest::DataFormat::Float16 &&
+           "Buffer types must be the same");
+    llvm::ArrayRef<uint16_t> Arr2(reinterpret_cast<uint16_t *>(B2->Data.get()),
+                                  B2->Size / sizeof(uint16_t));
+    for (unsigned I = 0, E = Arr1.size(); I < E; ++I) {
+      if (!compareFloat16ULP(Arr1[I], Arr2[I], ULPT))
+        return false;
+    }
+    return true;
+  }
   default:
     llvm_unreachable("Only float types are supported by the fuzzy test.");
   }
diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp
@@ -13,6 +13,10 @@
 
 using namespace offloadtest;
 
+bool isFloatingPointFormat(DataFormat Format) {
+  return Format == DataFormat::Float16 || Format == DataFormat::Float32;
+}
+
 namespace llvm {
 namespace yaml {
 void MappingTraits<offloadtest::Pipeline>::mapping(IO &I,
@@ -41,9 +45,9 @@ void MappingTraits<offloadtest::Pipeline>::mapping(IO &I,
       R.ExpectedPtr = P.getBuffer(R.Expected);
       if (!R.ExpectedPtr)
         I.setError(Twine("Reference buffer ") + R.Expected + " not found!");
-      if (R.Rule == offloadtest::Rule::BufferFuzzy) {
-        if (R.ActualPtr->Format != offloadtest::DataFormat::Float32 ||
-            R.ExpectedPtr->Format != offloadtest::DataFormat::Float32)
+      if (R.Rule == Rule::BufferFuzzy) {
+        if (!isFloatingPointFormat(R.ActualPtr->Format) ||
+            !isFloatingPointFormat(R.ExpectedPtr->Format))
           I.setError(Twine("BufferFuzzy only accepts Float buffers"));
       }
     }
@@ -127,7 +131,7 @@ void MappingTraits<offloadtest::Buffer>::mapping(IO &I,
     DATA_CASE(Int16, int16_t)
     DATA_CASE(Int32, int32_t)
     DATA_CASE(Int64, int64_t)
-    DATA_CASE(Float16, uint16_t)
+    DATA_CASE(Float16, llvm::yaml::Hex16)
     DATA_CASE(Float32, float)
     DATA_CASE(Float64, double)
     DATA_CASE(Bool, uint32_t) // Because sizeof(bool) is 1 but HLSL represents a bool using 4 bytes.
diff --git a/test/Tools/Offloader/BufferFuzzy-16bit.test b/test/Tools/Offloader/BufferFuzzy-16bit.test
@@ -0,0 +1,115 @@
+#--- source.hlsl
+
+RWStructuredBuffer<half> Out1 : register(u0);
+RWStructuredBuffer<half> Out2 : register(u1);
+RWStructuredBuffer<half> Out3 : register(u2);
+RWStructuredBuffer<half> Out4 : register(u3);
+
+[numthreads(1,1,1)]
+void main() {
+  Out1[0] = (half)3.14159265;
+  Out2[0] = (half)(0.0 / 0.0); // Should be NaN
+  Out3[0] = (half)5.40234375;
+  Out4[0] = (half)6.40234375;
+}
+
+//--- pipeline.yaml
+
+---
+Shaders:
+  - Stage: Compute
+    Entry: main
+    DispatchSize: [1, 1, 1]
+Buffers:
+  - Name: Out1
+    Format: Float16
+    Stride: 2
+    ZeroInitSize: 4
+  - Name: Expected1
+    Format: Float16
+    Stride: 2
+    Data: [ 0x4248, 0 ]
+  - Name: Out2
+    Format: Float16
+    Stride: 2
+    ZeroInitSize: 4
+  - Name: Expected2
+    Format: Float16
+    Stride: 2
+    Data: [ 0x7E00, 0 ] # Should be NaN
+  - Name: Out3
+    Format: Float16
+    Stride: 2
+    ZeroInitSize: 4
+  - Name: Expected3
+    Format: Float16
+    Stride: 2
+    Data: [ 0x4566, 0 ] # Should be 1 ulp away
+  - Name: Out4
+    Format: Float16
+    Stride: 2
+    ZeroInitSize: 4
+  - Name: Expected4
+    Format: Float16
+    Stride: 2
+    Data: [ 0x4665, 0 ] # Should be 2 ulp away
+Results:
+  - Result: Test1 # Test two values are exactly the same
+    Rule: BufferFuzzy
+    ULPT: 0 # ulp shouldn't matter for this test
+    Actual: Out1
+    Expected: Expected1
+  - Result: Test2 # Test both are NaN
+    Rule: BufferFuzzy
+    ULPT: 0 # ulp shouldn't matter for this test
+    Actual: Out2
+    Expected: Expected2
+  - Result: Test3 # Actual and Expected are not the same but they are within the ULPT
+    Rule: BufferFuzzy
+    ULPT: 1
+    Actual: Out3
+    Expected: Expected3
+  - Result: Test4 # Actual and Expected are not the same but they are within the ULPT
+    Rule: BufferFuzzy
+    ULPT: 2
+    Actual: Out4
+    Expected: Expected4
+DescriptorSets:
+  - Resources:
+    - Name: Out1
+      Kind: RWStructuredBuffer
+      DirectXBinding:
+        Register: 0
+        Space: 0
+      VulkanBinding:
+        Binding: 0
+    - Name: Out2
+      Kind: RWStructuredBuffer
+      DirectXBinding:
+        Register: 1
+        Space: 0
+      VulkanBinding:
+        Binding: 1
+    - Name: Out3
+      Kind: RWStructuredBuffer
+      DirectXBinding:
+        Register: 2
+        Space: 0
+      VulkanBinding:
+        Binding: 2
+    - Name: Out4
+      Kind: RWStructuredBuffer
+      DirectXBinding:
+        Register: 3
+        Space: 0
+      VulkanBinding:
+        Binding: 3
+...
+#--- end
+
+# REQUIRES: Half
+
+# UNSUPPORTED: Clang-Vulkan
+# RUN: split-file %s %t
+# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl
+# RUN: %offloader %t/pipeline.yaml %t.o
diff --git a/test/Tools/Offloader/BufferFuzzy-error-16bit.test b/test/Tools/Offloader/BufferFuzzy-error-16bit.test
@@ -0,0 +1,118 @@
+#--- source.hlsl
+
+RWStructuredBuffer<half> Out1 : register(u0);
+RWStructuredBuffer<half> Out2 : register(u1);
+
+[numthreads(1,1,1)]
+void main() {
+  Out1[0] = (half)20.3;
+  Out1[1] = (half)5.0;
+  Out2[0] = (half)0.0;
+}
+
+//--- pipeline.yaml
+
+---
+Shaders:
+  - Stage: Compute
+    Entry: main
+    DispatchSize: [1, 1, 1]
+Buffers:
+  - Name: Out1
+    Format: Float16
+    Stride: 2
+    ZeroInitSize: 4
+  - Name: Expected1
+    Format: Float16
+    Stride: 2
+    Data: [ 0x3E00, 0x4100 ]
+  - Name: Out2
+    Format: Float16
+    Stride: 2
+    ZeroInitSize: 4
+  - Name: Expected2
+    Format: Float16
+    Stride: 2
+    Data: [ 0x0001, 0 ]
+Results:
+  - Result: Test1
+    Rule: BufferFuzzy
+    ULPT: 1
+    Actual: Out1
+    Expected: Expected1
+  - Result: Test2 # Denorm value
+    Rule: BufferFuzzy
+    ULPT: 0
+    Actual: Out2
+    Expected: Expected2
+DescriptorSets:
+  - Resources:
+    - Name: Out1
+      Kind: RWStructuredBuffer
+      DirectXBinding:
+        Register: 0
+        Space: 0
+      VulkanBinding:
+        Binding: 0
+    - Name: Out2
+      Kind: RWStructuredBuffer
+      DirectXBinding:
+        Register: 1
+        Space: 0
+      VulkanBinding:
+        Binding: 1
+...
+#--- end
+
+# REQUIRES: Half
+
+# UNSUPPORTED: Clang-Vulkan
+# RUN: split-file %s %t
+# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl
+# RUN: not %offloader %t/pipeline.yaml %t.o 2>&1 | FileCheck %s
+
+# CHECK: Test failed: Test1
+# CHECK: Expected:
+# CHECK: ---
+# CHECK: Name:            Expected1
+# CHECK: Format:          Float16
+# CHECK: Stride:          2
+# CHECK: Data:            [ 0x3E00, 0x4100 ]
+# CHECK: OutputProps:
+# CHECK: Height:          0
+# CHECK: Width:           0
+# CHECK: Depth:           0
+# CHECK:  ...
+# CHECK: Got:
+# CHECK: ---
+# CHECK: Name:            Out1
+# CHECK: Format:          Float16
+# CHECK: Stride:          2
+# CHECK: Data:            [ 0x4D13, 0x4500 ]
+# CHECK: OutputProps:
+# CHECK: Height:          0
+# CHECK: Width:           0
+# CHECK: Depth:           0
+
+# CHECK: Test failed: Test2
+# CHECK: Expected:
+# CHECK: ---
+# CHECK: Name:            Expected2
+# CHECK: Format:          Float16
+# CHECK: Stride:          2
+# CHECK: Data:            [ 0x1, 0x0 ]
+# CHECK: OutputProps:
+# CHECK: Height:          0
+# CHECK: Width:           0
+# CHECK: Depth:           0
+# CHECK:  ...
+# CHECK: Got:
+# CHECK: ---
+# CHECK: Name:            Out2
+# CHECK: Format:          Float16
+# CHECK: Stride:          2
+# CHECK: Data:            [ 0x0, 0x0 ]
+# CHECK: OutputProps:
+# CHECK: Height:          0
+# CHECK: Width:           0
+# CHECK: Depth:           0