Skip to content

Commit d8706d6

Browse files
author
Tim Corringham
committed
[HLSL] Implement the f16tof32() intrinsic
Implement the f16tof32() intrinsic, including DXILand SPIRV codegen, and associated tests.
1 parent e83eee3 commit d8706d6

File tree

15 files changed

+527
-15
lines changed

15 files changed

+527
-15
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5222,6 +5222,12 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate {
52225222
let Prototype = "T(unsigned int, T)";
52235223
}
52245224

5225+
def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
5226+
let Spellings = ["__builtin_hlsl_elementwise_f16tof32"];
5227+
let Attributes = [NoThrow, Const, CustomTypeChecking];
5228+
let Prototype = "void(...)";
5229+
}
5230+
52255231
// Builtins for XRay.
52265232
def XRayCustomEvent : Builtin {
52275233
let Spellings = ["__xray_customevent"];

clang/lib/CodeGen/CGHLSLBuiltins.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,22 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
560560
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
561561
ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
562562
}
563+
case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
564+
Value *Op0 = EmitScalarExpr(E->getArg(0));
565+
llvm::Type *Xty = Op0->getType();
566+
llvm::Type *retType = llvm::Type::getFloatTy(this->getLLVMContext());
567+
if (Xty->isVectorTy()) {
568+
auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
569+
retType = llvm::VectorType::get(
570+
retType, ElementCount::getFixed(XVecTy->getNumElements()));
571+
}
572+
if (!E->getArg(0)->getType()->hasUnsignedIntegerRepresentation())
573+
llvm_unreachable(
574+
"f16tof32 operand must have an unsigned int representation");
575+
return Builder.CreateIntrinsic(
576+
retType, CGM.getHLSLRuntime().getLegacyF16ToF32Intrinsic(),
577+
ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
578+
}
563579
case Builtin::BI__builtin_hlsl_elementwise_frac: {
564580
Value *Op0 = EmitScalarExpr(E->getArg(0));
565581
if (!E->getArg(0)->getType()->hasFloatingRepresentation())

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class CGHLSLRuntime {
9696
flattened_thread_id_in_group)
9797
GENERATE_HLSL_INTRINSIC_FUNCTION(IsInf, isinf)
9898
GENERATE_HLSL_INTRINSIC_FUNCTION(IsNaN, isnan)
99+
GENERATE_HLSL_INTRINSIC_FUNCTION(LegacyF16ToF32, legacyf16tof32)
99100
GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
100101
GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
101102
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)

clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,27 @@ float3 exp2(float3);
10521052
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
10531053
float4 exp2(float4);
10541054

1055+
//===----------------------------------------------------------------------===//
1056+
// f16tof32 builtins
1057+
//===----------------------------------------------------------------------===//
1058+
1059+
/// \fn float f16tof32(uint x)
1060+
/// \brief Returns the half value stored in the low 16 bits of the uint arg
1061+
/// converted to a float.
1062+
/// \param x The uint containing two half values.
1063+
///
1064+
/// The float value of the half value found in the low 16 bits of the \a xi
1065+
/// parameter.
1066+
1067+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
1068+
float f16tof32(uint);
1069+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
1070+
float2 f16tof32(uint2);
1071+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
1072+
float3 f16tof32(uint3);
1073+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
1074+
float4 f16tof32(uint4);
1075+
10551076
//===----------------------------------------------------------------------===//
10561077
// firstbithigh builtins
10571078
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2738,6 +2738,23 @@ static bool CheckUnsignedIntRepresentation(Sema *S, SourceLocation Loc,
27382738
return false;
27392739
}
27402740

2741+
static bool CheckExpectedBitWidth(Sema *S, CallExpr *TheCall,
2742+
unsigned ArgOrdinal, unsigned Width) {
2743+
QualType ArgTy = TheCall->getArg(0)->getType();
2744+
if (auto *VTy = ArgTy->getAs<VectorType>())
2745+
ArgTy = VTy->getElementType();
2746+
// ensure arg type has expected bit width
2747+
uint64_t ElementBitCount =
2748+
S->getASTContext().getTypeSizeInChars(ArgTy).getQuantity() * 8;
2749+
if (ElementBitCount != Width) {
2750+
S->Diag(TheCall->getArg(0)->getBeginLoc(),
2751+
diag::err_integer_incorrect_bit_count)
2752+
<< Width << ElementBitCount;
2753+
return true;
2754+
}
2755+
return false;
2756+
}
2757+
27412758
static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall,
27422759
QualType ReturnType) {
27432760
auto *VecTyA = TheCall->getArg(0)->getType()->getAs<VectorType>();
@@ -2897,24 +2914,16 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
28972914
CheckUnsignedIntVecRepresentation))
28982915
return true;
28992916

2900-
auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
29012917
// ensure arg integers are 32-bits
2902-
uint64_t ElementBitCount = getASTContext()
2903-
.getTypeSizeInChars(VTy->getElementType())
2904-
.getQuantity() *
2905-
8;
2906-
if (ElementBitCount != 32) {
2907-
SemaRef.Diag(TheCall->getBeginLoc(),
2908-
diag::err_integer_incorrect_bit_count)
2909-
<< 32 << ElementBitCount;
2918+
if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
29102919
return true;
2911-
}
29122920

29132921
// ensure both args are vectors of total bit size of a multiple of 64
2922+
auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
29142923
int NumElementsArg = VTy->getNumElements();
29152924
if (NumElementsArg != 2 && NumElementsArg != 4) {
29162925
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count)
2917-
<< 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount;
2926+
<< 1 /*a multiple of*/ << 64 << NumElementsArg * 32;
29182927
return true;
29192928
}
29202929

@@ -3230,7 +3239,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
32303239
break;
32313240
}
32323241
// Note these are llvm builtins that we want to catch invalid intrinsic
3233-
// generation. Normal handling of these builitns will occur elsewhere.
3242+
// generation. Normal handling of these builtins will occur elsewhere.
32343243
case Builtin::BI__builtin_elementwise_bitreverse: {
32353244
// does not include a check for number of arguments
32363245
// because that is done previously
@@ -3340,6 +3349,30 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
33403349
}
33413350
break;
33423351
}
3352+
case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
3353+
if (SemaRef.checkArgCount(TheCall, 1))
3354+
return true;
3355+
if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
3356+
CheckUnsignedIntRepresentation))
3357+
return true;
3358+
// ensure arg integers are 32 bits
3359+
if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
3360+
return true;
3361+
// check it wasn't a bool type
3362+
QualType ArgTy = TheCall->getArg(0)->getType();
3363+
if (auto *VTy = ArgTy->getAs<VectorType>())
3364+
ArgTy = VTy->getElementType();
3365+
if (ArgTy->isBooleanType()) {
3366+
SemaRef.Diag(TheCall->getArg(0)->getBeginLoc(),
3367+
diag::err_builtin_invalid_arg_type)
3368+
<< 1 << /* scalar or vector of */ 5 << /* unsigned int */ 3
3369+
<< /* no fp */ 0 << TheCall->getArg(0)->getType();
3370+
return true;
3371+
}
3372+
3373+
SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
3374+
break;
3375+
}
33433376
}
33443377
return false;
33453378
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
3+
// RUN: -o - | FileCheck %s
4+
5+
// CHECK: define hidden noundef nofpclass(nan inf) float
6+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
7+
// CHECK: ret float %hlsl.f16tof32
8+
// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
9+
float test_scalar(uint p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
10+
11+
// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
12+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
13+
// CHECK: ret <2 x float> %hlsl.f16tof32
14+
// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
15+
float2 test_uint2(uint2 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
16+
17+
// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
18+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
19+
// CHECK: ret <3 x float> %hlsl.f16tof32
20+
// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
21+
float3 test_uint3(uint3 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
22+
23+
// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
24+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
25+
// CHECK: ret <4 x float> %hlsl.f16tof32
26+
// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
27+
float4 test_uint4(uint4 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
28+
29+
30+
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
3+
// RUN: -o - | FileCheck %s
4+
5+
// CHECK: define hidden noundef nofpclass(nan inf) float
6+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
7+
// CHECK: ret float %hlsl.f16tof32
8+
// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
9+
float test_scalar(uint p0) { return f16tof32(p0); }
10+
11+
// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
12+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
13+
// CHECK: ret <2 x float> %hlsl.f16tof32
14+
// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
15+
float2 test_uint2(uint2 p0) { return f16tof32(p0); }
16+
17+
// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
18+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
19+
// CHECK: ret <3 x float> %hlsl.f16tof32
20+
// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
21+
float3 test_uint3(uint3 p0) { return f16tof32(p0); }
22+
23+
// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
24+
// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
25+
// CHECK: ret <4 x float> %hlsl.f16tof32
26+
// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
27+
float4 test_uint4(uint4 p0) { return f16tof32(p0); }
28+
29+
30+
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
2+
3+
float builtin_f16tof32_too_few_arg() {
4+
return __builtin_hlsl_elementwise_f16tof32();
5+
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
6+
// expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
7+
}
8+
9+
float builtin_f16tof32_too_many_arg(uint p0) {
10+
return __builtin_hlsl_elementwise_f16tof32(p0, p0);
11+
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
12+
// expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
13+
}
14+
15+
float builtin_f16tof32_bool(bool p0) {
16+
return __builtin_hlsl_elementwise_f16tof32(p0);
17+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
18+
}
19+
20+
float builtin_f16tof32_bool4(bool4 p0) {
21+
return __builtin_hlsl_elementwise_f16tof32(p0);
22+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool4' (aka 'vector<bool, 4>')}}
23+
}
24+
25+
float builtin_f16tof32_int16_t(int16_t p0) {
26+
return __builtin_hlsl_elementwise_f16tof32(p0);
27+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}}
28+
}
29+
30+
float builtin_f16tof32_int16_t(unsigned short p0) {
31+
return __builtin_hlsl_elementwise_f16tof32(p0);
32+
// expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
33+
}
34+
35+
float builtin_f16tof32_int(int p0) {
36+
return __builtin_hlsl_elementwise_f16tof32(p0);
37+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
38+
}
39+
40+
float builtin_f16tof32_int64_t(long p0) {
41+
return __builtin_hlsl_elementwise_f16tof32(p0);
42+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
43+
}
44+
45+
float2 builtin_f16tof32_int2_to_float2_promotion(int2 p0) {
46+
return __builtin_hlsl_elementwise_f16tof32(p0);
47+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int2' (aka 'vector<int, 2>'))}}
48+
}
49+
50+
float builtin_f16tof32_half(half p0) {
51+
return __builtin_hlsl_elementwise_f16tof32(p0);
52+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
53+
}
54+
55+
float builtin_f16tof32_half4(half4 p0) {
56+
return __builtin_hlsl_elementwise_f16tof32(p0);
57+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half4' (aka 'vector<half, 4>'))}}
58+
}
59+
60+
float builtin_f16tof32_float(float p0) {
61+
return __builtin_hlsl_elementwise_f16tof32(p0);
62+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
63+
}
64+
65+
float builtin_f16tof32_double(double p0) {
66+
return __builtin_hlsl_elementwise_f16tof32(p0);
67+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
68+
}
69+
70+
float f16tof32_too_few_arg() {
71+
return f16tof32();
72+
// expected-error@-1 {{no matching function for call to 'f16tof32'}}
73+
}
74+
75+
float f16tof32_too_many_arg(uint p0) {
76+
return f16tof32(p0, p0);
77+
// expected-error@-1 {{no matching function for call to 'f16tof32'}}
78+
}
79+
80+
float f16tof32_bool(bool p0) {
81+
return f16tof32(p0);
82+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
83+
}
84+
85+
float f16tof32_bool3(bool3 p0) {
86+
return f16tof32(p0);
87+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector<bool, 3>')}}
88+
}
89+
90+
91+
float f16tof32_int16_t(int16_t p0) {
92+
return f16tof32(p0);
93+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}}
94+
}
95+
96+
float f16tof32_int16_t(unsigned short p0) {
97+
return f16tof32(p0);
98+
// expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
99+
}
100+
101+
float f16tof32_int(int p0) {
102+
return f16tof32(p0);
103+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
104+
}
105+
106+
float f16tof32_int64_t(long p0) {
107+
return f16tof32(p0);
108+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
109+
}
110+
111+
float2 f16tof32_int2_to_float2_promotion(int3 p0) {
112+
return f16tof32(p0);
113+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int3' (aka 'vector<int, 3>'))}}
114+
}
115+
116+
float f16tof32_half(half p0) {
117+
return f16tof32(p0);
118+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
119+
}
120+
121+
float f16tof32_half2(half2 p0) {
122+
return f16tof32(p0);
123+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half2' (aka 'vector<half, 2>'))}}
124+
}
125+
126+
float f16tof32_float(float p0) {
127+
return f16tof32(p0);
128+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
129+
}
130+
131+
float f16tof32_double(double p0) {
132+
return f16tof32(p0);
133+
// expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
134+
}

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1
140140
def int_dx_isnan : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
141141
[llvm_anyfloat_ty], [IntrNoMem]>;
142142

143+
def int_dx_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
144+
[llvm_anyint_ty], [IntrNoMem]>;
145+
143146
def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
144147
[IntrNoMem]>;
145148

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,4 +198,8 @@ def int_spv_resource_nonuniformindex
198198
def int_spv_generic_cast_to_ptr_explicit
199199
: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty],
200200
[IntrNoMem, NoUndef<RetIndex>]>;
201+
202+
def int_spv_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
203+
[llvm_anyint_ty], [IntrNoMem]>;
204+
201205
}

0 commit comments

Comments
 (0)