Skip to content

Commit 49021b3

Browse files
committed
[HLSL] Implement elementwise firstbitlow builtin
1 parent b904166 commit 49021b3

File tree

16 files changed

+617
-6
lines changed

16 files changed

+617
-6
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4810,6 +4810,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
48104810
let Prototype = "void(...)";
48114811
}
48124812

4813+
def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> {
4814+
let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"];
4815+
let Attributes = [NoThrow, Const];
4816+
let Prototype = "void(...)";
4817+
}
4818+
48134819
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
48144820
let Spellings = ["__builtin_hlsl_elementwise_frac"];
48154821
let Attributes = [NoThrow, Const];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18956,14 +18956,21 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1895618956
"hlsl.dot4add.u8packed");
1895718957
}
1895818958
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
18959-
1896018959
Value *X = EmitScalarExpr(E->getArg(0));
1896118960

1896218961
return Builder.CreateIntrinsic(
1896318962
/*ReturnType=*/ConvertType(E->getType()),
1896418963
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
1896518964
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
1896618965
}
18966+
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
18967+
Value *X = EmitScalarExpr(E->getArg(0));
18968+
18969+
return Builder.CreateIntrinsic(
18970+
/*ReturnType=*/ConvertType(E->getType()),
18971+
CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
18972+
nullptr, "hlsl.firstbitlow");
18973+
}
1896718974
case Builtin::BI__builtin_hlsl_lerp: {
1896818975
Value *X = EmitScalarExpr(E->getArg(0));
1896918976
Value *Y = EmitScalarExpr(E->getArg(1));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class CGHLSLRuntime {
9696
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
9797
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
9898
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
99+
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow)
99100
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
100101
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
101102
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,78 @@ uint3 firstbithigh(uint64_t3);
10861086
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
10871087
uint4 firstbithigh(uint64_t4);
10881088

1089+
//===----------------------------------------------------------------------===//
1090+
// firstbitlow builtins
1091+
//===----------------------------------------------------------------------===//
1092+
1093+
/// \fn T firstbitlow(T Val)
1094+
/// \brief Returns the location of the first set bit starting from the lowest
1095+
/// order bit and working upward, per component.
1096+
/// \param Val the input value.
1097+
1098+
#ifdef __HLSL_ENABLE_16_BIT
1099+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1100+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1101+
uint firstbitlow(int16_t);
1102+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1103+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1104+
uint2 firstbitlow(int16_t2);
1105+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1106+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1107+
uint3 firstbitlow(int16_t3);
1108+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1109+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1110+
uint4 firstbitlow(int16_t4);
1111+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1112+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1113+
uint firstbitlow(uint16_t);
1114+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1115+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1116+
uint2 firstbitlow(uint16_t2);
1117+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1118+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1119+
uint3 firstbitlow(uint16_t3);
1120+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1121+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1122+
uint4 firstbitlow(uint16_t4);
1123+
#endif
1124+
1125+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1126+
uint firstbitlow(int);
1127+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1128+
uint2 firstbitlow(int2);
1129+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1130+
uint3 firstbitlow(int3);
1131+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1132+
uint4 firstbitlow(int4);
1133+
1134+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1135+
uint firstbitlow(uint);
1136+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1137+
uint2 firstbitlow(uint2);
1138+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1139+
uint3 firstbitlow(uint3);
1140+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1141+
uint4 firstbitlow(uint4);
1142+
1143+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1144+
uint firstbitlow(int64_t);
1145+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1146+
uint2 firstbitlow(int64_t2);
1147+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1148+
uint3 firstbitlow(int64_t3);
1149+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1150+
uint4 firstbitlow(int64_t4);
1151+
1152+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1153+
uint firstbitlow(uint64_t);
1154+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1155+
uint2 firstbitlow(uint64_t2);
1156+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1157+
uint3 firstbitlow(uint64_t3);
1158+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1159+
uint4 firstbitlow(uint64_t4);
1160+
10891161
//===----------------------------------------------------------------------===//
10901162
// floor builtins
10911163
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1947,7 +1947,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
19471947
return true;
19481948
break;
19491949
}
1950-
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
1950+
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh:
1951+
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
19511952
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
19521953
return true;
19531954

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
3+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
4+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
5+
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
6+
// RUN: -emit-llvm -disable-llvm-passes \
7+
// RUN: -o - | FileCheck %s -DTARGET=spv
8+
9+
#ifdef __HLSL_ENABLE_16_BIT
10+
// CHECK-LABEL: test_firstbitlow_ushort
11+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
12+
uint test_firstbitlow_ushort(uint16_t p0) {
13+
return firstbitlow(p0);
14+
}
15+
16+
// CHECK-LABEL: test_firstbitlow_ushort2
17+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
18+
uint2 test_firstbitlow_ushort2(uint16_t2 p0) {
19+
return firstbitlow(p0);
20+
}
21+
22+
// CHECK-LABEL: test_firstbitlow_ushort3
23+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
24+
uint3 test_firstbitlow_ushort3(uint16_t3 p0) {
25+
return firstbitlow(p0);
26+
}
27+
28+
// CHECK-LABEL: test_firstbitlow_ushort4
29+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
30+
uint4 test_firstbitlow_ushort4(uint16_t4 p0) {
31+
return firstbitlow(p0);
32+
}
33+
34+
// CHECK-LABEL: test_firstbitlow_short
35+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
36+
uint test_firstbitlow_short(int16_t p0) {
37+
return firstbitlow(p0);
38+
}
39+
40+
// CHECK-LABEL: test_firstbitlow_short2
41+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
42+
uint2 test_firstbitlow_short2(int16_t2 p0) {
43+
return firstbitlow(p0);
44+
}
45+
46+
// CHECK-LABEL: test_firstbitlow_short3
47+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
48+
uint3 test_firstbitlow_short3(int16_t3 p0) {
49+
return firstbitlow(p0);
50+
}
51+
52+
// CHECK-LABEL: test_firstbitlow_short4
53+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
54+
uint4 test_firstbitlow_short4(int16_t4 p0) {
55+
return firstbitlow(p0);
56+
}
57+
#endif // __HLSL_ENABLE_16_BIT
58+
59+
// CHECK-LABEL: test_firstbitlow_uint
60+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
61+
uint test_firstbitlow_uint(uint p0) {
62+
return firstbitlow(p0);
63+
}
64+
65+
// CHECK-LABEL: test_firstbitlow_uint2
66+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
67+
uint2 test_firstbitlow_uint2(uint2 p0) {
68+
return firstbitlow(p0);
69+
}
70+
71+
// CHECK-LABEL: test_firstbitlow_uint3
72+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
73+
uint3 test_firstbitlow_uint3(uint3 p0) {
74+
return firstbitlow(p0);
75+
}
76+
77+
// CHECK-LABEL: test_firstbitlow_uint4
78+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
79+
uint4 test_firstbitlow_uint4(uint4 p0) {
80+
return firstbitlow(p0);
81+
}
82+
83+
// CHECK-LABEL: test_firstbitlow_ulong
84+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
85+
uint test_firstbitlow_ulong(uint64_t p0) {
86+
return firstbitlow(p0);
87+
}
88+
89+
// CHECK-LABEL: test_firstbitlow_ulong2
90+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
91+
uint2 test_firstbitlow_ulong2(uint64_t2 p0) {
92+
return firstbitlow(p0);
93+
}
94+
95+
// CHECK-LABEL: test_firstbitlow_ulong3
96+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
97+
uint3 test_firstbitlow_ulong3(uint64_t3 p0) {
98+
return firstbitlow(p0);
99+
}
100+
101+
// CHECK-LABEL: test_firstbitlow_ulong4
102+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
103+
uint4 test_firstbitlow_ulong4(uint64_t4 p0) {
104+
return firstbitlow(p0);
105+
}
106+
107+
// CHECK-LABEL: test_firstbitlow_int
108+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
109+
uint test_firstbitlow_int(int p0) {
110+
return firstbitlow(p0);
111+
}
112+
113+
// CHECK-LABEL: test_firstbitlow_int2
114+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
115+
uint2 test_firstbitlow_int2(int2 p0) {
116+
return firstbitlow(p0);
117+
}
118+
119+
// CHECK-LABEL: test_firstbitlow_int3
120+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
121+
uint3 test_firstbitlow_int3(int3 p0) {
122+
return firstbitlow(p0);
123+
}
124+
125+
// CHECK-LABEL: test_firstbitlow_int4
126+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
127+
uint4 test_firstbitlow_int4(int4 p0) {
128+
return firstbitlow(p0);
129+
}
130+
131+
// CHECK-LABEL: test_firstbitlow_long
132+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
133+
uint test_firstbitlow_long(int64_t p0) {
134+
return firstbitlow(p0);
135+
}
136+
137+
// CHECK-LABEL: test_firstbitlow_long2
138+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
139+
uint2 test_firstbitlow_long2(int64_t2 p0) {
140+
return firstbitlow(p0);
141+
}
142+
143+
// CHECK-LABEL: test_firstbitlow_long3
144+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
145+
uint3 test_firstbitlow_long3(int64_t3 p0) {
146+
return firstbitlow(p0);
147+
}
148+
149+
// CHECK-LABEL: test_firstbitlow_long4
150+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
151+
uint4 test_firstbitlow_long4(int64_t4 p0) {
152+
return firstbitlow(p0);
153+
}

clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@ double test_int_builtin(double p0) {
1717

1818
double2 test_int_builtin_2(double2 p0) {
1919
return __builtin_hlsl_elementwise_firstbithigh(p0);
20-
// expected-error@-1 {{1st argument must be a vector of integers
21-
// (was 'double2' (aka 'vector<double, 2>'))}}
20+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
2221
}
2322

2423
float test_int_builtin_3(float p0) {
2524
return __builtin_hlsl_elementwise_firstbithigh(p0);
26-
// expected-error@-1 {{1st argument must be a vector of integers
27-
// (was 'float')}}
25+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
2826
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
2+
3+
int test_too_few_arg() {
4+
return firstbitlow();
5+
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
6+
}
7+
8+
int test_too_many_arg(int p0) {
9+
return firstbitlow(p0, p0);
10+
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
11+
}
12+
13+
double test_int_builtin(double p0) {
14+
return firstbitlow(p0);
15+
// expected-error@-1 {{call to 'firstbitlow' is ambiguous}}
16+
}
17+
18+
double2 test_int_builtin_2(double2 p0) {
19+
return __builtin_hlsl_elementwise_firstbitlow(p0);
20+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
21+
}
22+
23+
float test_int_builtin_3(float p0) {
24+
return __builtin_hlsl_elementwise_firstbitlow(p0);
25+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
26+
}

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,6 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>
103103
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
104104
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
105105
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
106+
// TODO: check this against the spec
107+
def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
106108
}

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ let TargetPrefix = "spv" in {
106106
[IntrNoMem]>;
107107
def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
108108
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
109+
def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
109110

110111
// Read a value from the image buffer. It does not translate directly to a
111112
// single OpImageRead because the result type is not necessarily a 4 element

0 commit comments

Comments
 (0)