Skip to content

Commit 89ec96b

Browse files
tcorringhamTim Corringham
andauthored
[HLSL] Implement the f16tof32() intrinsic (llvm#165860)
Implement the f16tof32() intrinsic, including DXILand SPIRV codegen, and associated tests. Fixes llvm#99112 --------- Co-authored-by: Tim Corringham <[email protected]>
1 parent 2dc0fa1 commit 89ec96b

File tree

14 files changed

+419
-15
lines changed

14 files changed

+419
-15
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5235,6 +5235,12 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate {
52355235
let Prototype = "T(unsigned int, T)";
52365236
}
52375237

5238+
def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
5239+
let Spellings = ["__builtin_hlsl_elementwise_f16tof32"];
5240+
let Attributes = [NoThrow, Const, CustomTypeChecking];
5241+
let Prototype = "void(...)";
5242+
}
5243+
52385244
// Builtins for XRay.
52395245
def XRayCustomEvent : Builtin {
52405246
let Spellings = ["__xray_customevent"];

clang/lib/CodeGen/CGHLSLBuiltins.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,57 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
160160
return LastInst;
161161
}
162162

163+
static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
164+
const CallExpr *E) {
165+
Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
166+
QualType Op0Ty = E->getArg(0)->getType();
167+
llvm::Type *ResType = CGF.FloatTy;
168+
uint64_t NumElements = 0;
169+
if (Op0->getType()->isVectorTy()) {
170+
NumElements =
171+
E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
172+
ResType =
173+
llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
174+
}
175+
if (!Op0Ty->hasUnsignedIntegerRepresentation())
176+
llvm_unreachable(
177+
"f16tof32 operand must have an unsigned int representation");
178+
179+
if (CGF.CGM.getTriple().isDXIL())
180+
return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf16tof32,
181+
ArrayRef<Value *>{Op0}, nullptr,
182+
"hlsl.f16tof32");
183+
184+
if (CGF.CGM.getTriple().isSPIRV()) {
185+
// We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
186+
// Int16 and Float16 capabilities
187+
auto UnpackType =
188+
llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
189+
if (NumElements == 0) {
190+
// a scalar input - simply extract the first element of the unpacked
191+
// vector
192+
Value *Unpack = CGF.Builder.CreateIntrinsic(
193+
UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{Op0});
194+
return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
195+
} else {
196+
// a vector input - build a congruent output vector by iterating through
197+
// the input vector calling unpackhalf2x16 for each element
198+
Value *Result = PoisonValue::get(ResType);
199+
for (uint64_t i = 0; i < NumElements; i++) {
200+
Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
201+
Value *Unpack = CGF.Builder.CreateIntrinsic(
202+
UnpackType, Intrinsic::spv_unpackhalf2x16,
203+
ArrayRef<Value *>{InVal});
204+
Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
205+
Result = CGF.Builder.CreateInsertElement(Result, Res, i);
206+
}
207+
return Result;
208+
}
209+
}
210+
211+
llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
212+
}
213+
163214
static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
164215
LValue &Stride) {
165216
// Figure out the stride of the buffer elements from the handle type.
@@ -579,6 +630,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
579630
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
580631
ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
581632
}
633+
case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
634+
return handleElementwiseF16ToF32(*this, E);
635+
}
582636
case Builtin::BI__builtin_hlsl_elementwise_frac: {
583637
Value *Op0 = EmitScalarExpr(E->getArg(0));
584638
if (!E->getArg(0)->getType()->hasFloatingRepresentation())

clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,27 @@ float3 exp2(float3);
10521052
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
10531053
float4 exp2(float4);
10541054

1055+
//===----------------------------------------------------------------------===//
1056+
// f16tof32 builtins
1057+
//===----------------------------------------------------------------------===//
1058+
1059+
/// \fn float f16tof32(uint x)
1060+
/// \brief Returns the half value stored in the low 16 bits of the uint arg
1061+
/// converted to a float.
1062+
/// \param x The uint containing two half values.
1063+
///
1064+
/// The float value of the half value found in the low 16 bits of the \a xi
1065+
/// parameter.
1066+
1067+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
1068+
float f16tof32(uint);
1069+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
1070+
float2 f16tof32(uint2);
1071+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
1072+
float3 f16tof32(uint3);
1073+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
1074+
float4 f16tof32(uint4);
1075+
10551076
//===----------------------------------------------------------------------===//
10561077
// firstbithigh builtins
10571078
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2802,6 +2802,23 @@ static bool CheckUnsignedIntRepresentation(Sema *S, SourceLocation Loc,
28022802
return false;
28032803
}
28042804

2805+
static bool CheckExpectedBitWidth(Sema *S, CallExpr *TheCall,
2806+
unsigned ArgOrdinal, unsigned Width) {
2807+
QualType ArgTy = TheCall->getArg(0)->getType();
2808+
if (auto *VTy = ArgTy->getAs<VectorType>())
2809+
ArgTy = VTy->getElementType();
2810+
// ensure arg type has expected bit width
2811+
uint64_t ElementBitCount =
2812+
S->getASTContext().getTypeSizeInChars(ArgTy).getQuantity() * 8;
2813+
if (ElementBitCount != Width) {
2814+
S->Diag(TheCall->getArg(0)->getBeginLoc(),
2815+
diag::err_integer_incorrect_bit_count)
2816+
<< Width << ElementBitCount;
2817+
return true;
2818+
}
2819+
return false;
2820+
}
2821+
28052822
static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall,
28062823
QualType ReturnType) {
28072824
auto *VecTyA = TheCall->getArg(0)->getType()->getAs<VectorType>();
@@ -2961,24 +2978,16 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
29612978
CheckUnsignedIntVecRepresentation))
29622979
return true;
29632980

2964-
auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
29652981
// ensure arg integers are 32-bits
2966-
uint64_t ElementBitCount = getASTContext()
2967-
.getTypeSizeInChars(VTy->getElementType())
2968-
.getQuantity() *
2969-
8;
2970-
if (ElementBitCount != 32) {
2971-
SemaRef.Diag(TheCall->getBeginLoc(),
2972-
diag::err_integer_incorrect_bit_count)
2973-
<< 32 << ElementBitCount;
2982+
if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
29742983
return true;
2975-
}
29762984

29772985
// ensure both args are vectors of total bit size of a multiple of 64
2986+
auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
29782987
int NumElementsArg = VTy->getNumElements();
29792988
if (NumElementsArg != 2 && NumElementsArg != 4) {
29802989
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count)
2981-
<< 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount;
2990+
<< 1 /*a multiple of*/ << 64 << NumElementsArg * 32;
29822991
return true;
29832992
}
29842993

@@ -3295,7 +3304,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
32953304
break;
32963305
}
32973306
// Note these are llvm builtins that we want to catch invalid intrinsic
3298-
// generation. Normal handling of these builitns will occur elsewhere.
3307+
// generation. Normal handling of these builtins will occur elsewhere.
32993308
case Builtin::BI__builtin_elementwise_bitreverse: {
33003309
// does not include a check for number of arguments
33013310
// because that is done previously
@@ -3405,6 +3414,30 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
34053414
}
34063415
break;
34073416
}
3417+
case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
3418+
if (SemaRef.checkArgCount(TheCall, 1))
3419+
return true;
3420+
if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
3421+
CheckUnsignedIntRepresentation))
3422+
return true;
3423+
// ensure arg integers are 32 bits
3424+
if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
3425+
return true;
3426+
// check it wasn't a bool type
3427+
QualType ArgTy = TheCall->getArg(0)->getType();
3428+
if (auto *VTy = ArgTy->getAs<VectorType>())
3429+
ArgTy = VTy->getElementType();
3430+
if (ArgTy->isBooleanType()) {
3431+
SemaRef.Diag(TheCall->getArg(0)->getBeginLoc(),
3432+
diag::err_builtin_invalid_arg_type)
3433+
<< 1 << /* scalar or vector of */ 5 << /* unsigned int */ 3
3434+
<< /* no fp */ 0 << TheCall->getArg(0)->getType();
3435+
return true;
3436+
}
3437+
3438+
SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
3439+
break;
3440+
}
34083441
}
34093442
return false;
34103443
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
3+
// RUN: -o - | FileCheck %s
4+
5+
// CHECK: define hidden noundef nofpclass(nan inf) float
6+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
7+
// CHECK: ret float %hlsl.f16tof32
8+
// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
9+
float test_scalar(uint p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
10+
11+
// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
12+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
13+
// CHECK: ret <2 x float> %hlsl.f16tof32
14+
// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
15+
float2 test_uint2(uint2 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
16+
17+
// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
18+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
19+
// CHECK: ret <3 x float> %hlsl.f16tof32
20+
// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
21+
float3 test_uint3(uint3 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
22+
23+
// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
24+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
25+
// CHECK: ret <4 x float> %hlsl.f16tof32
26+
// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
27+
float4 test_uint4(uint4 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
28+
29+
30+
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
3+
// RUN: -o - | FileCheck %s
4+
5+
// CHECK: define hidden noundef nofpclass(nan inf) float
6+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
7+
// CHECK: ret float %hlsl.f16tof32
8+
// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
9+
float test_scalar(uint p0) { return f16tof32(p0); }
10+
11+
// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
12+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
13+
// CHECK: ret <2 x float> %hlsl.f16tof32
14+
// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
15+
float2 test_uint2(uint2 p0) { return f16tof32(p0); }
16+
17+
// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
18+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
19+
// CHECK: ret <3 x float> %hlsl.f16tof32
20+
// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
21+
float3 test_uint3(uint3 p0) { return f16tof32(p0); }
22+
23+
// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
24+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
25+
// CHECK: ret <4 x float> %hlsl.f16tof32
26+
// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
27+
float4 test_uint4(uint4 p0) { return f16tof32(p0); }
28+
29+
30+
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify
2+
3+
float builtin_f16tof32_too_few_arg() {
4+
return __builtin_hlsl_elementwise_f16tof32();
5+
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
6+
// expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
7+
}
8+
9+
float builtin_f16tof32_too_many_arg(uint p0) {
10+
return __builtin_hlsl_elementwise_f16tof32(p0, p0);
11+
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
12+
// expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
13+
}
14+
15+
float builtin_f16tof32_bool(bool p0) {
16+
return __builtin_hlsl_elementwise_f16tof32(p0);
17+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
18+
}
19+
20+
float builtin_f16tof32_bool4(bool4 p0) {
21+
return __builtin_hlsl_elementwise_f16tof32(p0);
22+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool4' (aka 'vector<bool, 4>')}}
23+
}
24+
25+
float builtin_f16tof32_short(short p0) {
26+
return __builtin_hlsl_elementwise_f16tof32(p0);
27+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}}
28+
}
29+
30+
float builtin_f16tof32_unsigned_short(unsigned short p0) {
31+
return __builtin_hlsl_elementwise_f16tof32(p0);
32+
// expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
33+
}
34+
35+
float builtin_f16tof32_int(int p0) {
36+
return __builtin_hlsl_elementwise_f16tof32(p0);
37+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
38+
}
39+
40+
float builtin_f16tof32_int64_t(long p0) {
41+
return __builtin_hlsl_elementwise_f16tof32(p0);
42+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
43+
}
44+
45+
float2 builtin_f16tof32_int2_to_float2_promotion(int2 p0) {
46+
return __builtin_hlsl_elementwise_f16tof32(p0);
47+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int2' (aka 'vector<int, 2>'))}}
48+
}
49+
50+
float builtin_f16tof32_half(half p0) {
51+
return __builtin_hlsl_elementwise_f16tof32(p0);
52+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
53+
}
54+
55+
float builtin_f16tof32_half4(half4 p0) {
56+
return __builtin_hlsl_elementwise_f16tof32(p0);
57+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half4' (aka 'vector<half, 4>'))}}
58+
}
59+
60+
float builtin_f16tof32_float(float p0) {
61+
return __builtin_hlsl_elementwise_f16tof32(p0);
62+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
63+
}
64+
65+
float builtin_f16tof32_double(double p0) {
66+
return __builtin_hlsl_elementwise_f16tof32(p0);
67+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
68+
}
69+
70+
float f16tof32_too_few_arg() {
71+
return f16tof32();
72+
// expected-error@-1 {{no matching function for call to 'f16tof32'}}
73+
}
74+
75+
float f16tof32_too_many_arg(uint p0) {
76+
return f16tof32(p0, p0);
77+
// expected-error@-1 {{no matching function for call to 'f16tof32'}}
78+
}
79+
80+
float f16tof32_bool(bool p0) {
81+
return f16tof32(p0);
82+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
83+
}
84+
85+
float f16tof32_bool3(bool3 p0) {
86+
return f16tof32(p0);
87+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector<bool, 3>'))}}
88+
}
89+
90+
91+
float f16tof32_int16_t(short p0) {
92+
return f16tof32(p0);
93+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}}
94+
}
95+
96+
float f16tof32_int16_t(unsigned short p0) {
97+
return f16tof32(p0);
98+
// expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
99+
}
100+
101+
float f16tof32_int(int p0) {
102+
return f16tof32(p0);
103+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
104+
}
105+
106+
float f16tof32_int64_t(long p0) {
107+
return f16tof32(p0);
108+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
109+
}
110+
111+
float2 f16tof32_int2_to_float2_promotion(int3 p0) {
112+
return f16tof32(p0);
113+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int3' (aka 'vector<int, 3>'))}}
114+
}
115+
116+
float f16tof32_half(half p0) {
117+
return f16tof32(p0);
118+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
119+
}
120+
121+
float f16tof32_half2(half2 p0) {
122+
return f16tof32(p0);
123+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half2' (aka 'vector<half, 2>'))}}
124+
}
125+
126+
float f16tof32_float(float p0) {
127+
return f16tof32(p0);
128+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
129+
}
130+
131+
float f16tof32_double(double p0) {
132+
return f16tof32(p0);
133+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
134+
}

0 commit comments

Comments
 (0)