Skip to content

Commit 86f5bb5

Browse files
pow2clkGreg Rothllvm-beanzV-FEXrt
authored
[SM6.9] Enable Native Vector Overloads for Derivatives (#7598)
This enables derivative operations using native vectors by allowing native vectors in temporary convergent functions. HLSL intrinsics that lower to DXIL derivative ops get their parameters marked as convergent by passing their parameters to a temporary convergent function. This function scalarized vectors, leading to the results remaining scalarized. This change adds native vector support overloads to the convergent function and generates them in the convergent pass. This preserves the native vectors throughout final DXIL. Moves tests for the until now scalarized intrinsics to native vector test locations. fwidth requires some more involved expansion while the derivative operations can be tested trivially. Fixes #7343 --------- Co-authored-by: Greg Roth <[email protected]> Co-authored-by: Chris B <[email protected]> Co-authored-by: Ashley Coleman <[email protected]>
1 parent 50764ba commit 86f5bb5

12 files changed

+275
-39
lines changed

lib/DXIL/DxilOperations.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -765,32 +765,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
765765
"unary",
766766
Attribute::ReadNone,
767767
1,
768-
{{0x3}},
769-
{{0x0}}}, // Overloads: hf
768+
{{0x403}},
769+
{{0x3}}}, // Overloads: hf<hf
770770
{OC::DerivCoarseY,
771771
"DerivCoarseY",
772772
OCC::Unary,
773773
"unary",
774774
Attribute::ReadNone,
775775
1,
776-
{{0x3}},
777-
{{0x0}}}, // Overloads: hf
776+
{{0x403}},
777+
{{0x3}}}, // Overloads: hf<hf
778778
{OC::DerivFineX,
779779
"DerivFineX",
780780
OCC::Unary,
781781
"unary",
782782
Attribute::ReadNone,
783783
1,
784-
{{0x3}},
785-
{{0x0}}}, // Overloads: hf
784+
{{0x403}},
785+
{{0x3}}}, // Overloads: hf<hf
786786
{OC::DerivFineY,
787787
"DerivFineY",
788788
OCC::Unary,
789789
"unary",
790790
Attribute::ReadNone,
791791
1,
792-
{{0x3}},
793-
{{0x0}}}, // Overloads: hf
792+
{{0x403}},
793+
{{0x3}}}, // Overloads: hf<hf
794794

795795
// Pixel shader
796796
{OC::EvalSnapped,

lib/HLSL/DxilConvergent.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,18 @@ class DxilConvergentMark : public ModulePass {
3838
public:
3939
static char ID; // Pass identification, replacement for typeid
4040
explicit DxilConvergentMark() : ModulePass(ID) {}
41+
bool SupportsVectors = false;
4142

4243
StringRef getPassName() const override { return "DxilConvergentMark"; }
4344

4445
bool runOnModule(Module &M) override {
45-
if (M.HasHLModule()) {
46-
const ShaderModel *SM = M.GetHLModule().GetShaderModel();
47-
if (!SM->IsPS() && !SM->IsLib() &&
48-
(!SM->IsSM66Plus() || (!SM->IsCS() && !SM->IsMS() && !SM->IsAS())))
49-
return false;
50-
}
46+
const ShaderModel *SM = M.GetOrCreateHLModule().GetShaderModel();
47+
// Can skip if in a shader and version that doesn't support derivatives.
48+
if (!SM->IsPS() && !SM->IsLib() &&
49+
(!SM->IsSM66Plus() || (!SM->IsCS() && !SM->IsMS() && !SM->IsAS())))
50+
return false;
51+
SupportsVectors = SM->IsSM69Plus();
52+
5153
bool bUpdated = false;
5254

5355
for (Function &F : M.functions()) {
@@ -87,7 +89,14 @@ char DxilConvergentMark::ID = 0;
8789

8890
void DxilConvergentMark::MarkConvergent(Value *V, IRBuilder<> &Builder,
8991
Module &M) {
90-
Type *Ty = V->getType()->getScalarType();
92+
Type *Ty = V->getType();
93+
bool NeedVectorExpansion = false;
94+
VectorType *VTy = dyn_cast<VectorType>(Ty);
95+
if (VTy && (!SupportsVectors || VTy->getNumElements() == 1)) {
96+
Ty = Ty->getScalarType();
97+
NeedVectorExpansion = true;
98+
}
99+
91100
// Only work on vector/scalar types.
92101
if (Ty->isAggregateType() || Ty->isPointerTy())
93102
return;
@@ -98,7 +107,8 @@ void DxilConvergentMark::MarkConvergent(Value *V, IRBuilder<> &Builder,
98107
os.flush();
99108
Function *ConvF = cast<Function>(M.getOrInsertFunction(str, FT));
100109
ConvF->addFnAttr(Attribute::AttrKind::Convergent);
101-
if (VectorType *VT = dyn_cast<VectorType>(V->getType())) {
110+
if (NeedVectorExpansion) {
111+
VectorType *VT = cast<VectorType>(V->getType());
102112
Value *ConvV = UndefValue::get(V->getType());
103113
std::vector<ExtractElementInst *> extractList(VT->getNumElements());
104114
for (unsigned i = 0; i < VT->getNumElements(); i++) {

lib/HLSL/HLOperationLower.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6981,17 +6981,15 @@ IntrinsicLower gLowerTable[] = {
69816981
{IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet,
69826982
DXIL::OpCode::Countbits},
69836983
{IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
6984-
{IntrinsicOp::IOP_ddx, TrivialUnaryOperationRet,
6984+
{IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
6985+
{IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation,
69856986
DXIL::OpCode::DerivCoarseX},
6986-
{IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperationRet,
6987-
DXIL::OpCode::DerivCoarseX},
6988-
{IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperationRet,
6987+
{IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation,
69896988
DXIL::OpCode::DerivFineX},
6990-
{IntrinsicOp::IOP_ddy, TrivialUnaryOperationRet,
6991-
DXIL::OpCode::DerivCoarseY},
6992-
{IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperationRet,
6989+
{IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
6990+
{IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation,
69936991
DXIL::OpCode::DerivCoarseY},
6994-
{IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperationRet,
6992+
{IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation,
69956993
DXIL::OpCode::DerivFineY},
69966994
{IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
69976995
{IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},

tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,22 @@ void main() {
334334
// CHECK: fdiv fast <[[NUM]] x float> <float 1.000000e+00, {{.*}}>, [[fvec1]]
335335
fRes += rcp(fVec1);
336336

337+
// CHECK-NOT: extractelement
338+
// CHECK-NOT: insertelement
339+
// CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 83, <[[NUM]] x half> [[hvec1]]) ; DerivCoarseX(value)
340+
// CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 6, <[[NUM]] x half> [[tmp]]) ; FAbs(value)
341+
// CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 84, <[[NUM]] x half> [[hvec1]]) ; DerivCoarseY(value)
342+
// CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 6, <[[NUM]] x half> [[tmp]]) ; FAbs(value)
343+
hRes += fwidth(hVec1);
344+
345+
// CHECK-NOT: extractelement
346+
// CHECK-NOT: insertelement
347+
// CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 83, <[[NUM]] x float> [[fvec1]]) ; DerivCoarseX(value)
348+
// CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 6, <[[NUM]] x float> [[tmp]]) ; FAbs(value)
349+
// CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 84, <[[NUM]] x float> [[fvec1]]) ; DerivCoarseY(value)
350+
// CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 6, <[[NUM]] x float> [[tmp]]) ; FAbs(value)
351+
fRes += fwidth(fVec1);
352+
337353
vector<uint, NUM> signs = 1;
338354
// CHECK-NOT: extractelement
339355
// CHECK-NOT: insertelement

tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,6 @@
99
// RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
1010
// RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
1111
// RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
12-
// RUN: %dxc -DFUNC=ddx -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
13-
// RUN: %dxc -DFUNC=ddx_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
14-
// RUN: %dxc -DFUNC=ddx_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
15-
// RUN: %dxc -DFUNC=ddy -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
16-
// RUN: %dxc -DFUNC=ddy_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
17-
// RUN: %dxc -DFUNC=ddy_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
18-
// RUN: %dxc -DFUNC=fwidth -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
1912
// RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
2013
// RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
2114
// RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD

tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,20 @@
4141
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=7 %s | FileCheck %s
4242
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=1022 %s | FileCheck %s
4343

44+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddx -DOP=83 -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,CONV
45+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddx -DOP=83 -DNUM=1022 %s | FileCheck %s -check-prefixes=CHECK,CONV
46+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddx_coarse -DOP=83 -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,CONV
47+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddx_coarse -DOP=83 -DNUM=1022 %s | FileCheck %s -check-prefixes=CHECK,CONV
48+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddx_fine -DOP=85 -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,CONV
49+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddx_fine -DOP=85 -DNUM=1022 %s | FileCheck %s -check-prefixes=CHECK,CONV
50+
51+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddy -DOP=84 -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,CONV
52+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddy -DOP=84 -DNUM=1022 %s | FileCheck %s -check-prefixes=CHECK,CONV
53+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddy_coarse -DOP=84 -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,CONV
54+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddy_coarse -DOP=84 -DNUM=1022 %s | FileCheck %s -check-prefixes=CHECK,CONV
55+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddy_fine -DOP=86 -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,CONV
56+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ddy_fine -DOP=86 -DNUM=1022 %s | FileCheck %s -check-prefixes=CHECK,CONV
57+
4458
// Test vector-enabled unary intrinsics that take float-like parameters and
4559
// and are "trivial" in that they can be implemented with a single call
4660
// instruction with the same parameter and return types.
@@ -64,6 +78,9 @@ void main() {
6478
// CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
6579
vector<float16_t, NUM> hVec = buf.Load<vector<float16_t, NUM> >(0);
6680

81+
// Convergent markers prevent GVN removal of redundant annotateHandle calls.
82+
// CONV: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
83+
6784
// CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024
6885
// CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
6986
vector<float, NUM> fVec = buf.Load<vector<float, NUM> >(1024);
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: %dxc -T ps_6_1 -DFUNC=ddx %s | FileCheck %s --check-prefixes CHECK,PRE69
2+
// RUN: %dxc -T ps_6_1 -DFUNC=ddx_coarse %s | FileCheck %s --check-prefixes CHECK,PRE69
3+
// RUN: %dxc -T ps_6_1 -DFUNC=ddx_fine %s | FileCheck %s --check-prefixes CHECK,PRE69
4+
// RUN: %dxc -T ps_6_1 -DFUNC=ddy %s | FileCheck %s --check-prefixes CHECK,PRE69
5+
// RUN: %dxc -T ps_6_1 -DFUNC=ddy_coarse %s | FileCheck %s --check-prefixes CHECK,PRE69
6+
// RUN: %dxc -T ps_6_1 -DFUNC=ddy_fine %s | FileCheck %s --check-prefixes CHECK,PRE69
7+
8+
// RUN: %dxc -T ps_6_9 -DFUNC=ddx %s | FileCheck %s --check-prefixes CHECK,SM69
9+
// RUN: %dxc -T ps_6_9 -DFUNC=ddx_coarse %s | FileCheck %s --check-prefixes CHECK,SM69
10+
// RUN: %dxc -T ps_6_9 -DFUNC=ddx_fine %s | FileCheck %s --check-prefixes CHECK,SM69
11+
// RUN: %dxc -T ps_6_9 -DFUNC=ddy %s | FileCheck %s --check-prefixes CHECK,SM69
12+
// RUN: %dxc -T ps_6_9 -DFUNC=ddy_coarse %s | FileCheck %s --check-prefixes CHECK,SM69
13+
// RUN: %dxc -T ps_6_9 -DFUNC=ddy_fine %s | FileCheck %s --check-prefixes CHECK,SM69
14+
15+
// Make sure add(s) are not sunk into the conditional block.
16+
// SM69: fadd fast <2 x float>
17+
// PRE69: fadd fast float
18+
// PRE69: fadd fast float
19+
// CHECK: icmp sgt
20+
// CHECK-NEXT: br i1
21+
22+
// Source for test of dxil-convergent pass.
23+
24+
float2 main(float2 a:A, float2 b:B, int c:C) : SV_Target {
25+
26+
float2 coord = a + b;
27+
float2 res = 0;
28+
if (c > 2)
29+
res -= FUNC(coord);
30+
31+
return res;
32+
33+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; RUN: %dxopt %s -hlsl-passes-resume -hlsl-dxil-convergent-mark -S | FileCheck %s
2+
3+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
4+
target triple = "dxil-ms-dx"
5+
6+
; Function Attrs: nounwind readnone
7+
declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x float>)"(i32, <2 x float>) #1
8+
9+
; Function Attrs: nounwind
10+
define void @main(<2 x float>* noalias %arg, <2 x float> %arg1, <2 x float> %arg2, i32 %arg3) #0 {
11+
bb:
12+
13+
%tmp = fadd <2 x float> %arg1, %arg2
14+
; CHECK: [[vec:%.*]] = call <2 x float> @"dxil.convergent.marker.<2 x float>"(<2 x float> %tmp)
15+
%tmp4 = icmp sgt i32 %arg3, 2
16+
%tmp5 = icmp ne i1 %tmp4, false
17+
%tmp6 = icmp ne i1 %tmp5, false
18+
br i1 %tmp6, label %bb7, label %bb10
19+
20+
bb7: ; preds = %bb
21+
; CHECK: call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x float>)"(i32 128, <2 x float> [[vec]])
22+
%tmp8 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x float>)"(i32 128, <2 x float> %tmp)
23+
%tmp9 = fsub <2 x float> zeroinitializer, %tmp8
24+
br label %bb10
25+
26+
bb10: ; preds = %bb7, %bb
27+
%res.0 = phi <2 x float> [ %tmp9, %bb7 ], [ zeroinitializer, %bb ]
28+
store <2 x float> %res.0, <2 x float>* %arg
29+
ret void
30+
}
31+
32+
attributes #0 = { nounwind }
33+
attributes #1 = { nounwind readnone }
34+
35+
!llvm.module.flags = !{!0}
36+
!pauseresume = !{!1}
37+
!llvm.ident = !{!2}
38+
!dx.version = !{!3}
39+
!dx.valver = !{!3}
40+
!dx.shaderModel = !{!4}
41+
!dx.typeAnnotations = !{!5}
42+
!dx.entryPoints = !{!18}
43+
!dx.fnprops = !{!19}
44+
!dx.options = !{!20, !21}
45+
46+
!0 = !{i32 2, !"Debug Info Version", i32 3}
47+
!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
48+
!2 = !{!"dxc(private) 1.8.0.4959 (coopvec-tests, 43e1db83c-dirty)"}
49+
!3 = !{i32 1, i32 9}
50+
!4 = !{!"ps", i32 6, i32 9}
51+
!5 = !{i32 1, void (<2 x float>*, <2 x float>, <2 x float>, i32)* @main, !6}
52+
!6 = !{!7, !9, !12, !14, !16}
53+
!7 = !{i32 0, !8, !8}
54+
!8 = !{}
55+
!9 = !{i32 1, !10, !11}
56+
!10 = !{i32 4, !"SV_Target", i32 7, i32 9}
57+
!11 = !{i32 0}
58+
!12 = !{i32 0, !13, !11}
59+
!13 = !{i32 4, !"A", i32 7, i32 9}
60+
!14 = !{i32 0, !15, !11}
61+
!15 = !{i32 4, !"B", i32 7, i32 9}
62+
!16 = !{i32 0, !17, !11}
63+
!17 = !{i32 4, !"C", i32 7, i32 4}
64+
!18 = !{void (<2 x float>*, <2 x float>, <2 x float>, i32)* @main, !"main", null, null, null}
65+
!19 = !{void (<2 x float>*, <2 x float>, <2 x float>, i32)* @main, i32 0, i1 false}
66+
!20 = !{i32 64}
67+
!21 = !{i32 -1}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; RUN: %dxopt %s -hlsl-passes-resume -hlsl-dxil-convergent-mark -S | FileCheck %s
2+
3+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
4+
target triple = "dxil-ms-dx"
5+
6+
; Function Attrs: nounwind readnone
7+
declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x float>)"(i32, <2 x float>) #1
8+
9+
; Function Attrs: nounwind
10+
define void @main(<2 x float>* noalias %arg, <2 x float> %arg1, <2 x float> %arg2, i32 %arg3) #0 {
11+
bb:
12+
; CHECK: [[val:%.*]] = extractelement <2 x float> %tmp, i64 0
13+
; CHECK: [[conv:%.*]] = call float @dxil.convergent.marker.float(float [[val]])
14+
; CHECK: [[vec0:%.*]] = insertelement <2 x float> undef, float [[conv]], i64 0
15+
; CHECK: [[val:%.*]] = extractelement <2 x float> %tmp, i64 1
16+
; CHECK: [[conv:%.*]] = call float @dxil.convergent.marker.float(float [[val]])
17+
; CHECK: [[vec:%.*]] = insertelement <2 x float> [[vec0]], float [[conv]], i64 1
18+
%tmp = fadd <2 x float> %arg1, %arg2
19+
%tmp4 = icmp sgt i32 %arg3, 2
20+
%tmp5 = icmp ne i1 %tmp4, false
21+
%tmp6 = icmp ne i1 %tmp5, false
22+
br i1 %tmp6, label %bb7, label %bb10
23+
24+
bb7: ; preds = %bb
25+
; CHECK: call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x float>)"(i32 128, <2 x float> [[vec]])
26+
%tmp8 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x float>)"(i32 128, <2 x float> %tmp)
27+
%tmp9 = fsub <2 x float> zeroinitializer, %tmp8
28+
br label %bb10
29+
30+
bb10: ; preds = %bb7, %bb
31+
%res.0 = phi <2 x float> [ %tmp9, %bb7 ], [ zeroinitializer, %bb ]
32+
store <2 x float> %res.0, <2 x float>* %arg
33+
ret void
34+
}
35+
36+
attributes #0 = { nounwind }
37+
attributes #1 = { nounwind readnone }
38+
39+
!llvm.module.flags = !{!0}
40+
!pauseresume = !{!1}
41+
!llvm.ident = !{!2}
42+
!dx.version = !{!3}
43+
!dx.valver = !{!4}
44+
!dx.shaderModel = !{!5}
45+
!dx.typeAnnotations = !{!6}
46+
!dx.entryPoints = !{!19}
47+
!dx.fnprops = !{!20}
48+
!dx.options = !{!21, !22}
49+
50+
!0 = !{i32 2, !"Debug Info Version", i32 3}
51+
!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
52+
!2 = !{!"dxc(private) 1.8.0.4959 (coopvec-tests, 43e1db83c-dirty)"}
53+
!3 = !{i32 1, i32 8}
54+
!4 = !{i32 1, i32 9}
55+
!5 = !{!"ps", i32 6, i32 8}
56+
!6 = !{i32 1, void (<2 x float>*, <2 x float>, <2 x float>, i32)* @main, !7}
57+
!7 = !{!8, !10, !13, !15, !17}
58+
!8 = !{i32 0, !9, !9}
59+
!9 = !{}
60+
!10 = !{i32 1, !11, !12}
61+
!11 = !{i32 4, !"SV_Target", i32 7, i32 9}
62+
!12 = !{i32 0}
63+
!13 = !{i32 0, !14, !12}
64+
!14 = !{i32 4, !"A", i32 7, i32 9}
65+
!15 = !{i32 0, !16, !12}
66+
!16 = !{i32 4, !"B", i32 7, i32 9}
67+
!17 = !{i32 0, !18, !12}
68+
!18 = !{i32 4, !"C", i32 7, i32 4}
69+
!19 = !{void (<2 x float>*, <2 x float>, <2 x float>, i32)* @main, !"main", null, null, null}
70+
!20 = !{void (<2 x float>*, <2 x float>, <2 x float>, i32)* @main, i32 0, i1 false}
71+
!21 = !{i32 64}
72+
!22 = !{i32 -1}
73+
!23 = !DILocation(line: 26, column: 20, scope: !24)
74+
!24 = !DISubprogram(name: "main", scope: !25, file: !25, line: 24, type: !26, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: false, function: void (<2 x float>*, <2 x float>, <2 x float>, i32)* @main)
75+
!25 = !DIFile(filename: "/Users/pow2clk/dxc/tools/clang/test/CodeGenDXIL/passes/convergent-derivs.hlsl", directory: "")
76+
!26 = !DISubroutineType(types: !9)
77+
!27 = !DILocation(line: 28, column: 9, scope: !24)
78+
!28 = !DILocation(line: 28, column: 7, scope: !24)
79+
!29 = !DILocation(line: 29, column: 12, scope: !24)
80+
!30 = !DILocation(line: 29, column: 9, scope: !24)
81+
!31 = !DILocation(line: 29, column: 5, scope: !24)
82+
!32 = !DILocation(line: 31, column: 3, scope: !24)

tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ void main() {
6060
// CHECK: fmul fast <13 x float> [[mul]], [[sub]]
6161
fRes += smoothstep(fVec1, fVec2, fVec3);
6262

63-
// Intrinsics that expand into llvm ops.
64-
6563
// CHECK: fmul fast <13 x float> [[fvec3]], <float 0x3F91DF46A0000000
6664
fRes += radians(fVec3);
6765

@@ -82,6 +80,11 @@ void main() {
8280
// CHECK: fmul fast <13 x half> [[tmp]], [[hvec1]]
8381
hRes += lerp(hVec2, hVec3, hVec1);
8482

83+
// CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 83, <13 x float> [[fvec1]]) ; DerivCoarseX(value)
84+
// CHECK: call <13 x float> @dx.op.unary.v13f32(i32 6, <13 x float> [[tmp]]) ; FAbs(value)
85+
// CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 84, <13 x float> [[fvec1]]) ; DerivCoarseY(value)
86+
// CHECK: call <13 x float> @dx.op.unary.v13f32(i32 6, <13 x float> [[tmp]]) ; FAbs(value)
87+
fRes += fwidth(fVec1);
8588

8689
// CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4)
8790
// CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
@@ -165,6 +168,9 @@ void main() {
165168
// CHECK: call <13 x float> @dx.op.tertiary.v13f32(i32 46, <13 x float> [[fvec1]], <13 x float> [[fvec2]], <13 x float> [[fvec3]]) ; FMad(a,b,c)
166169
fRes += mad(fVec1, fVec2, fVec3);
167170

171+
// CHECK: call <13 x half> @dx.op.unary.v13f16(i32 85, <13 x half> [[hvec1]]) ; DerivFineX(value)
172+
hRes += ddx_fine(hVec1);
173+
168174
// CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8)
169175
// CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0
170176
// CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8)

0 commit comments

Comments
 (0)