-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[HLSL] [DirectX] Invert the result of firstbithigh
#166419
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-directx @llvm/pr-subscribers-hlsl Author: Deric C. (Icohedron) ChangesFixes #145752 This PR inverts the result of Although the original issue is labeled with [DirectX], I assume the result of This Clang codegen of %FirstbitSHi = call i32 @<!-- -->dx.op.unaryBits.i32(i32 34, i32 %i)
%1 = sub i32 31, %FirstbitSHi
%2 = icmp eq i32 %FirstbitSHi, -1 ; not emitted by clang
%3 = select i1 %2, i32 -1, i32 %1 ; not emitted by clangThe comparison with Full diff: https://github.com/llvm/llvm-project/pull/166419.diff 4 Files Affected:
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 4c5861c2c5f9d..c0914914a1262 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1073,78 +1073,6 @@ float3 f16tof32(uint3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
float4 f16tof32(uint4);
-//===----------------------------------------------------------------------===//
-// firstbithigh builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T firstbithigh(T Val)
-/// \brief Returns the location of the first set bit starting from the highest
-/// order bit and working downward, per component.
-/// \param Val the input value.
-
-#ifdef __HLSL_ENABLE_16_BIT
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int16_t4);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint16_t4);
-#endif
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int64_t4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint64_t4);
-
//===----------------------------------------------------------------------===//
// firstbitlow builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index c877234479ad1..8560c75016b4f 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -148,6 +148,15 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) {
return exp2(Exp) * X;
}
+template <typename T, int Bitwidth> constexpr uint firstbithigh_impl(T X) {
+ return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X);
+}
+
+template <typename T, int N, int Bitwidth>
+constexpr vector<uint, N> firstbithigh_impl(vector<T, N> X) {
+ return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X);
+}
+
} // namespace __detail
} // namespace hlsl
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 5ba5bfb9abde0..192c3a2c974d9 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -261,6 +261,67 @@ faceforward(__detail::HLSL_FIXED_VECTOR<float, L> N,
return __detail::faceforward_impl(N, I, Ng);
}
+//===----------------------------------------------------------------------===//
+// firstbithigh builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T firstbithigh(T Val)
+/// \brief Returns the location of the first set bit starting from the lowest
+/// order bit and working upward, per component.
+/// \param Val the input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+
+template <typename T>
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+const inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
+ __detail::is_same<uint16_t, T>::value,
+ uint> firstbithigh(T X) {
+ return __detail::firstbithigh_impl<T, 16>(X);
+}
+
+template <typename T, int N>
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+const
+ inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
+ __detail::is_same<uint16_t, T>::value,
+ vector<uint, N>> firstbithigh(vector<T, N> X) {
+ return __detail::firstbithigh_impl<T, N, 16>(X);
+}
+
+#endif
+
+template <typename T>
+const inline __detail::enable_if_t<
+ __detail::is_same<int, T>::value || __detail::is_same<uint, T>::value, uint>
+firstbithigh(T X) {
+ return __detail::firstbithigh_impl<T, 32>(X);
+}
+
+template <typename T, int N>
+const inline __detail::enable_if_t<__detail::is_same<int, T>::value ||
+ __detail::is_same<uint, T>::value,
+ vector<uint, N>>
+firstbithigh(vector<T, N> X) {
+ return __detail::firstbithigh_impl<T, N, 32>(X);
+}
+
+template <typename T>
+const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
+ __detail::is_same<uint64_t, T>::value,
+ uint>
+firstbithigh(T X) {
+ return __detail::firstbithigh_impl<T, 64>(X);
+}
+
+template <typename T, int N>
+const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
+ __detail::is_same<uint64_t, T>::value,
+ vector<uint, N>>
+firstbithigh(vector<T, N> X) {
+ return __detail::firstbithigh_impl<T, N, 64>(X);
+}
+
//===----------------------------------------------------------------------===//
// fmod builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
index 368d652a6f779..c8fa942fa81ff 100644
--- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
@@ -1,160 +1,184 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \
-// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type -fnative-int16-type \
-// RUN: -emit-llvm -disable-llvm-passes \
-// RUN: -o - | FileCheck %s -DTARGET=spv
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv
#ifdef __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_firstbithigh_ushort
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i16
+// CHECK: sub i32 15, [[FBH]]
uint test_firstbithigh_ushort(uint16_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ushort2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
+// CHECK: sub <2 x i32> splat (i32 15), [[FBH]]
uint2 test_firstbithigh_ushort2(uint16_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ushort3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
+// CHECK: sub <3 x i32> splat (i32 15), [[FBH]]
uint3 test_firstbithigh_ushort3(uint16_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ushort4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
+// CHECK: sub <4 x i32> splat (i32 15), [[FBH]]
uint4 test_firstbithigh_ushort4(uint16_t4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i16
+// CHECK: sub i32 15, [[FBH]]
uint test_firstbithigh_short(int16_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
+// CHECK: sub <2 x i32> splat (i32 15), [[FBH]]
uint2 test_firstbithigh_short2(int16_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
+// CHECK: sub <3 x i32> splat (i32 15), [[FBH]]
uint3 test_firstbithigh_short3(int16_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
+// CHECK: sub <4 x i32> splat (i32 15), [[FBH]]
uint4 test_firstbithigh_short4(int16_t4 p0) {
return firstbithigh(p0);
}
#endif // __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_firstbithigh_uint
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i32
+// CHECK: sub i32 31, [[FBH]]
uint test_firstbithigh_uint(uint p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_uint2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
+// CHECK: sub <2 x i32> splat (i32 31), [[FBH]]
uint2 test_firstbithigh_uint2(uint2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_uint3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
+// CHECK: sub <3 x i32> splat (i32 31), [[FBH]]
uint3 test_firstbithigh_uint3(uint3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_uint4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
+// CHECK: sub <4 x i32> splat (i32 31), [[FBH]]
uint4 test_firstbithigh_uint4(uint4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i64
+// CHECK: sub i32 63, [[FBH]]
uint test_firstbithigh_ulong(uint64_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
+// CHECK: sub <2 x i32> splat (i32 63), [[FBH]]
uint2 test_firstbithigh_ulong2(uint64_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
+// CHECK: sub <3 x i32> splat (i32 63), [[FBH]]
uint3 test_firstbithigh_ulong3(uint64_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
+// CHECK: sub <4 x i32> splat (i32 63), [[FBH]]
uint4 test_firstbithigh_ulong4(uint64_t4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i32
+// CHECK: sub i32 31, [[FBH]]
uint test_firstbithigh_int(int p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
+// CHECK: sub <2 x i32> splat (i32 31), [[FBH]]
uint2 test_firstbithigh_int2(int2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
+// CHECK: sub <3 x i32> splat (i32 31), [[FBH]]
uint3 test_firstbithigh_int3(int3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
+// CHECK: sub <4 x i32> splat (i32 31), [[FBH]]
uint4 test_firstbithigh_int4(int4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i64
+// CHECK: sub i32 63, [[FBH]]
uint test_firstbithigh_long(int64_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
+// CHECK: sub <2 x i32> splat (i32 63), [[FBH]]
uint2 test_firstbithigh_long2(int64_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
+// CHECK: sub <3 x i32> splat (i32 63), [[FBH]]
uint3 test_firstbithigh_long3(int64_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
+// CHECK: sub <4 x i32> splat (i32 63), [[FBH]]
uint4 test_firstbithigh_long4(int64_t4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_upcast
// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> %{{.*}})
-// CHECK: [[CONV:%.*]] = zext <4 x i32> [[FBH]] to <4 x i64>
+// CHECK: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]]
+// CHECK: [[CONV:%.*]] = zext <4 x i32> [[SUB]] to <4 x i64>
// CHECK: ret <4 x i64> [[CONV]]
uint64_t4 test_firstbithigh_upcast(uint4 p0) {
return firstbithigh(p0);
|
This comment was marked as resolved.
This comment was marked as resolved.
This comment was marked as resolved.
This comment was marked as resolved.
|
Here is some further reading/context that we want to make sure we've applied here microsoft/DirectXShaderCompiler#169 |
firstbithighfirstbithigh
This comment was marked as resolved.
This comment was marked as resolved.
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
farzonl
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Have a readability nit for the test. Don't feel strongly about it but not a huge fan of CHECK-NOT
Fixes llvm#145752 This PR inverts the result of `firstbithigh` when targeting DirectX by subtracting it from integer bitwidth - 1 to match the result from DXC. The result is not inverted if `firstbithigh` returned -1 or when targeting a backend other than DirectX.
Fixes #145752
This PR inverts the result of
firstbithighwhen targeting DirectX by subtracting it from integer bitwidth - 1 to match the result from DXC.The result is not inverted if
firstbithighreturned -1 or when targeting a backend other than DirectX.