Skip to content

Commit 16661b5

Browse files
authored
[DirectX] Add isinf f16 emulation for SM6.8 and lower (llvm#156932)
fixes llvm#156068 - We needed to add a new sub arch to the target tripple so we can test that emulation does not happen when targeting SM6.9 - The HLSL toolchain needed to be updated to handle the conversion of strings to enums for the new sub arch. - The emulation is done in DXILIntrinsicExpansion.cpp and needs to be able to convert both llvm.is.fpclass and lvm.dx.isinf to the proper emulation - test updates in TargetParser/TripleTest.cpp, isinf.ll, is_fpclass.ll, and DXCModeTest.cpp
1 parent 1e5ca97 commit 16661b5

File tree

8 files changed

+126
-14
lines changed

8 files changed

+126
-14
lines changed

clang/lib/Driver/ToolChains/HLSL.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ std::optional<std::string> tryParseProfile(StringRef Profile) {
132132
case 8:
133133
SubArch = llvm::Triple::DXILSubArch_v1_8;
134134
break;
135+
case 9:
136+
SubArch = llvm::Triple::DXILSubArch_v1_9;
137+
break;
135138
case OfflineLibMinor:
136139
// Always consider minor version x as the latest supported DXIL version
137140
SubArch = llvm::Triple::LatestDXILSubArch;

clang/unittests/Driver/DXCModeTest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ TEST(DxcModeTest, TargetProfileValidation) {
8585
InMemoryFileSystem, Diags);
8686
validateTargetProfile("-Tcs_6_8", "dxilv1.8--shadermodel6.8-compute",
8787
InMemoryFileSystem, Diags);
88-
validateTargetProfile("-Tlib_6_x", "dxilv1.8--shadermodel6.15-library",
88+
validateTargetProfile("-Tlib_6_x", "dxilv1.9--shadermodel6.15-library",
8989
InMemoryFileSystem, Diags);
9090

9191
// Invalid tests.

llvm/include/llvm/TargetParser/Triple.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,8 @@ class Triple {
180180
DXILSubArch_v1_6,
181181
DXILSubArch_v1_7,
182182
DXILSubArch_v1_8,
183-
LatestDXILSubArch = DXILSubArch_v1_8,
183+
DXILSubArch_v1_9,
184+
LatestDXILSubArch = DXILSubArch_v1_9,
184185
};
185186
enum VendorType {
186187
UnknownVendor,

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,41 @@ static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy,
5151
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
5252
}
5353

54+
static Value *expand16BitIsInf(CallInst *Orig) {
55+
Module *M = Orig->getModule();
56+
if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))
57+
return nullptr;
58+
59+
Value *Val = Orig->getOperand(0);
60+
Type *ValTy = Val->getType();
61+
if (!ValTy->getScalarType()->isHalfTy())
62+
return nullptr;
63+
64+
IRBuilder<> Builder(Orig);
65+
Type *IType = Type::getInt16Ty(M->getContext());
66+
Constant *PosInf =
67+
ValTy->isVectorTy()
68+
? ConstantVector::getSplat(
69+
ElementCount::getFixed(
70+
cast<FixedVectorType>(ValTy)->getNumElements()),
71+
ConstantInt::get(IType, 0x7c00))
72+
: ConstantInt::get(IType, 0x7c00);
73+
74+
Constant *NegInf =
75+
ValTy->isVectorTy()
76+
? ConstantVector::getSplat(
77+
ElementCount::getFixed(
78+
cast<FixedVectorType>(ValTy)->getNumElements()),
79+
ConstantInt::get(IType, 0xfc00))
80+
: ConstantInt::get(IType, 0xfc00);
81+
82+
Value *IVal = Builder.CreateBitCast(Val, PosInf->getType());
83+
Value *B1 = Builder.CreateICmpEQ(IVal, PosInf);
84+
Value *B2 = Builder.CreateICmpEQ(IVal, NegInf);
85+
Value *B3 = Builder.CreateOr(B1, B2);
86+
return B3;
87+
}
88+
5489
static bool isIntrinsicExpansion(Function &F) {
5590
switch (F.getIntrinsicID()) {
5691
case Intrinsic::abs:
@@ -68,6 +103,7 @@ static bool isIntrinsicExpansion(Function &F) {
68103
case Intrinsic::dx_sclamp:
69104
case Intrinsic::dx_nclamp:
70105
case Intrinsic::dx_degrees:
106+
case Intrinsic::dx_isinf:
71107
case Intrinsic::dx_lerp:
72108
case Intrinsic::dx_normalize:
73109
case Intrinsic::dx_fdot:
@@ -301,9 +337,10 @@ static Value *expandIsFPClass(CallInst *Orig) {
301337
auto *TCI = dyn_cast<ConstantInt>(T);
302338

303339
// These FPClassTest cases have DXIL opcodes, so they will be handled in
304-
// DXIL Op Lowering instead.
340+
// DXIL Op Lowering instead for all non f16 cases.
305341
switch (TCI->getZExtValue()) {
306342
case FPClassTest::fcInf:
343+
return expand16BitIsInf(Orig);
307344
case FPClassTest::fcNan:
308345
case FPClassTest::fcNormal:
309346
case FPClassTest::fcFinite:
@@ -873,6 +910,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
873910
case Intrinsic::dx_degrees:
874911
Result = expandDegreesIntrinsic(Orig);
875912
break;
913+
case Intrinsic::dx_isinf:
914+
Result = expand16BitIsInf(Orig);
915+
break;
876916
case Intrinsic::dx_lerp:
877917
Result = expandLerpIntrinsic(Orig);
878918
break;

llvm/lib/TargetParser/Triple.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) {
158158
return "dxilv1.7";
159159
case Triple::DXILSubArch_v1_8:
160160
return "dxilv1.8";
161+
case Triple::DXILSubArch_v1_9:
162+
return "dxilv1.9";
161163
default:
162164
break;
163165
}
@@ -650,6 +652,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
650652
.Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
651653
"dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7",
652654
"dxilv1.8", Triple::dxil)
655+
// Note: Cases has max limit of 10.
656+
.Case("dxilv1.9", Triple::dxil)
653657
.Case("xtensa", Triple::xtensa)
654658
.Default(Triple::UnknownArch);
655659

@@ -842,6 +846,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
842846
.EndsWith("v1.6", Triple::DXILSubArch_v1_6)
843847
.EndsWith("v1.7", Triple::DXILSubArch_v1_7)
844848
.EndsWith("v1.8", Triple::DXILSubArch_v1_8)
849+
.EndsWith("v1.9", Triple::DXILSubArch_v1_9)
845850
.Default(Triple::NoSubArch);
846851

847852
StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
@@ -1111,7 +1116,7 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) {
11111116
VersionTuple Ver =
11121117
parseVersionFromName(ShaderModelStr.drop_front(strlen("shadermodel")));
11131118
// Default DXIL minor version when Shader Model version is anything other
1114-
// than 6.[0...8] or 6.x (which translates to latest current SM version)
1119+
// than 6.[0...9] or 6.x (which translates to latest current SM version)
11151120
const unsigned SMMajor = 6;
11161121
if (!Ver.empty()) {
11171122
if (Ver.getMajor() == SMMajor) {
@@ -1135,6 +1140,8 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) {
11351140
return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_7);
11361141
case 8:
11371142
return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_8);
1143+
case 9:
1144+
return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_9);
11381145
default:
11391146
report_fatal_error("Unsupported Shader Model version", false);
11401147
}

llvm/test/CodeGen/DirectX/is_fpclass.ll

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
1+
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK
2+
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK
33

44

55
define noundef i1 @isnegzero(float noundef %a) {
@@ -75,6 +75,23 @@ entry:
7575
ret i1 %0
7676
}
7777

78+
define noundef i1 @isinfh(half noundef %a) {
79+
; CHECK-LABEL: define noundef i1 @isinfh(
80+
; CHECK-SAME: half noundef [[A:%.*]]) {
81+
; CHECK-NEXT: [[ENTRY:.*:]]
82+
; SM69CHECK-NEXT: [[ISINF:%.*]] = call i1 @dx.op.isSpecialFloat.f16(i32 9, half [[A]]) #[[ATTR0]]
83+
; SMOLDCHECK-NEXT: [[BITCAST:%.*]] = bitcast half %a to i16
84+
; SMOLDCHECK-NEXT: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744
85+
; SMOLDCHECK-NEXT: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024
86+
; SMOLDCHECK-NEXT: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]]
87+
; SMOLDCHECK-NEXT: ret i1 [[OR]]
88+
; SM69CHECK-NEXT: ret i1 [[ISINF]]
89+
;
90+
entry:
91+
%0 = call i1 @llvm.is.fpclass.f16(half %a, i32 516)
92+
ret i1 %0
93+
}
94+
7895
define noundef <2 x i1> @isinfv2(<2 x float> noundef %a) {
7996
; CHECK-LABEL: define noundef <2 x i1> @isinfv2(
8097
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {

llvm/test/CodeGen/DirectX/isinf.ll

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
1+
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK
2+
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK
23

34
; Make sure dxil operation function calls for isinf are generated for float and half.
45

@@ -11,17 +12,47 @@ entry:
1112

1213
define noundef i1 @isinf_half(half noundef %a) {
1314
entry:
14-
; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
15+
; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
16+
; SMOLDCHECK: [[BITCAST:%.*]] = bitcast half %a to i16
17+
; SMOLDCHECK: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744
18+
; SMOLDCHECK: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024
19+
; SMOLDCHECK: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]]
1520
%dx.isinf = call i1 @llvm.dx.isinf.f16(half %a)
1621
ret i1 %dx.isinf
1722
}
1823

1924
define noundef <4 x i1> @isinf_half4(<4 x half> noundef %p0) {
2025
entry:
21-
; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
22-
; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
23-
; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
24-
; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
26+
; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
27+
; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
28+
; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
29+
; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
30+
31+
; SMOLDCHECK: [[ee0:%.*]] = extractelement <4 x half> %p0, i64 0
32+
; SMOLDCHECK: [[BITCAST0:%.*]] = bitcast half [[ee0]] to i16
33+
; SMOLDCHECK: [[ee1:%.*]] = extractelement <4 x half> %p0, i64 1
34+
; SMOLDCHECK: [[BITCAST1:%.*]] = bitcast half [[ee1]] to i16
35+
; SMOLDCHECK:[[ee2:%.*]] = extractelement <4 x half> %p0, i64 2
36+
; SMOLDCHECK: [[BITCAST2:%.*]] = bitcast half [[ee2]] to i16
37+
; SMOLDCHECK: [[ee3:%.*]] = extractelement <4 x half> %p0, i64 3
38+
; SMOLDCHECK: [[BITCAST3:%.*]] = bitcast half [[ee3]] to i16
39+
; SMOLDCHECK: [[ICMPHIGH0:%.*]] = icmp eq i16 [[BITCAST0]], 31744
40+
; SMOLDCHECK: [[ICMPHIGH1:%.*]] = icmp eq i16 [[BITCAST1]], 31744
41+
; SMOLDCHECK: [[ICMPHIGH2:%.*]] = icmp eq i16 [[BITCAST2]], 31744
42+
; SMOLDCHECK: [[ICMPHIGH3:%.*]] = icmp eq i16 [[BITCAST3]], 31744
43+
; SMOLDCHECK: [[ICMPLOW0:%.*]] = icmp eq i16 [[BITCAST0]], -1024
44+
; SMOLDCHECK: [[ICMPLOW1:%.*]] = icmp eq i16 [[BITCAST1]], -1024
45+
; SMOLDCHECK: [[ICMPLOW2:%.*]] = icmp eq i16 [[BITCAST2]], -1024
46+
; SMOLDCHECK: [[ICMPLOW3:%.*]] = icmp eq i16 [[BITCAST3]], -1024
47+
; SMOLDCHECK: [[OR0:%.*]] = or i1 [[ICMPHIGH0]], [[ICMPLOW0]]
48+
; SMOLDCHECK: [[OR1:%.*]] = or i1 [[ICMPHIGH1]], [[ICMPLOW1]]
49+
; SMOLDCHECK: [[OR2:%.*]] = or i1 [[ICMPHIGH2]], [[ICMPLOW2]]
50+
; SMOLDCHECK: [[OR3:%.*]] = or i1 [[ICMPHIGH3]], [[ICMPLOW3]]
51+
; SMOLDCHECK: %.upto019 = insertelement <4 x i1> poison, i1 [[OR0]], i64 0
52+
; SMOLDCHECK: %.upto120 = insertelement <4 x i1> %.upto019, i1 [[OR1]], i64 1
53+
; SMOLDCHECK: %.upto221 = insertelement <4 x i1> %.upto120, i1 [[OR2]], i64 2
54+
; SMOLDCHECK: %0 = insertelement <4 x i1> %.upto221, i1 [[OR3]], i64 3
55+
2556
%hlsl.isinf = call <4 x i1> @llvm.dx.isinf.v4f16(<4 x half> %p0)
2657
ret <4 x i1> %hlsl.isinf
2758
}

llvm/unittests/TargetParser/TripleTest.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,13 @@ TEST(TripleTest, ParsedIDs) {
553553
EXPECT_EQ(Triple::ShaderModel, T.getOS());
554554
EXPECT_EQ(VersionTuple(1, 8), T.getDXILVersion());
555555

556+
T = Triple("dxilv1.9-unknown-shadermodel6.15-library");
557+
EXPECT_EQ(Triple::dxil, T.getArch());
558+
EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch());
559+
EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
560+
EXPECT_EQ(Triple::ShaderModel, T.getOS());
561+
EXPECT_EQ(VersionTuple(1, 9), T.getDXILVersion());
562+
556563
T = Triple("x86_64-unknown-fuchsia");
557564
EXPECT_EQ(Triple::x86_64, T.getArch());
558565
EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
@@ -1270,6 +1277,12 @@ TEST(TripleTest, ParsedIDs) {
12701277
EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
12711278
EXPECT_EQ(Triple::UnknownOS, T.getOS());
12721279

1280+
T = Triple("dxilv1.9-unknown-unknown");
1281+
EXPECT_EQ(Triple::dxil, T.getArch());
1282+
EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch());
1283+
EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
1284+
EXPECT_EQ(Triple::UnknownOS, T.getOS());
1285+
12731286
// Check specification of unknown SubArch results in
12741287
// unknown architecture.
12751288
T = Triple("dxilv1.999-unknown-unknown");
@@ -3226,9 +3239,9 @@ TEST(TripleTest, DXILNormaizeWithVersion) {
32263239
Triple::normalize("dxil--shadermodel6.0"));
32273240
EXPECT_EQ("dxilv1.1-unknown-shadermodel6.1-library",
32283241
Triple::normalize("dxil-shadermodel6.1-unknown-library"));
3229-
EXPECT_EQ("dxilv1.8-unknown-shadermodel6.x-unknown",
3242+
EXPECT_EQ("dxilv1.9-unknown-shadermodel6.x-unknown",
32303243
Triple::normalize("dxil-unknown-shadermodel6.x-unknown"));
3231-
EXPECT_EQ("dxilv1.8-unknown-shadermodel6.x-unknown",
3244+
EXPECT_EQ("dxilv1.9-unknown-shadermodel6.x-unknown",
32323245
Triple::normalize("dxil-unknown-shadermodel6.x-unknown"));
32333246
EXPECT_EQ("dxil-unknown-unknown-unknown", Triple::normalize("dxil---"));
32343247
EXPECT_EQ("dxilv1.0-pc-shadermodel5.0-compute",

0 commit comments

Comments
 (0)