Skip to content

Commit 231d648

Browse files
author
Greg Roth
authored
[SM6.9] Disable native vec deriv ops and expand testing (#7432)
Several intrinsics that were enabled for native vectors late got their testing removed as it expected scalarized forms. This adds tests for pow, modf, and abs in their native vector intrinsic forms. It removes native vector intrinsics for derivative operations as they require more scalarization removal due to their convergent markers and restores the scalarized testing for them. The 1024 size was removed from longvec-intrinsics as the verbose way that constant vectors are represented in the disassembly made the test take significantly longer.
1 parent 8df7449 commit 231d648

File tree

5 files changed

+70
-13
lines changed

5 files changed

+70
-13
lines changed

lib/DXIL/DxilOperations.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -765,32 +765,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
765765
"unary",
766766
Attribute::ReadNone,
767767
1,
768-
{{0x403}},
769-
{{0x3}}}, // Overloads: hf<hf
768+
{{0x3}},
769+
{{0x0}}}, // Overloads: hf
770770
{OC::DerivCoarseY,
771771
"DerivCoarseY",
772772
OCC::Unary,
773773
"unary",
774774
Attribute::ReadNone,
775775
1,
776-
{{0x403}},
777-
{{0x3}}}, // Overloads: hf<hf
776+
{{0x3}},
777+
{{0x0}}}, // Overloads: hf
778778
{OC::DerivFineX,
779779
"DerivFineX",
780780
OCC::Unary,
781781
"unary",
782782
Attribute::ReadNone,
783783
1,
784-
{{0x403}},
785-
{{0x3}}}, // Overloads: hf<hf
784+
{{0x3}},
785+
{{0x0}}}, // Overloads: hf
786786
{OC::DerivFineY,
787787
"DerivFineY",
788788
OCC::Unary,
789789
"unary",
790790
Attribute::ReadNone,
791791
1,
792-
{{0x403}},
793-
{{0x3}}}, // Overloads: hf<hf
792+
{{0x3}},
793+
{{0x0}}}, // Overloads: hf
794794

795795
// Pixel shader
796796
{OC::EvalSnapped,

tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7 %s | FileCheck %s
33
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s
44
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=256 %s | FileCheck %s
5-
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=1024 %s | FileCheck %s
65

76
// Test vector-enabled non-trivial intrinsics that take parameters of various types.
87

@@ -203,6 +202,36 @@ void main() {
203202
// CHECK: fmul fast <[[NUM]] x float> [[tmp]], <float 0x3FE62E4300000000
204203
fRes += log(fVec1);
205204

205+
// CHECK-NOT: extractelement
206+
// CHECK-NOT: insertelement
207+
// CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec2]]) ; Log(value)
208+
// CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]]
209+
// CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp2]]) ; Exp(value)
210+
hRes += pow(hVec2, hVec1);
211+
212+
// CHECK-NOT: extractelement
213+
// CHECK-NOT: insertelement
214+
// CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec2]]) ; Log(value)
215+
// CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]]
216+
// CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp2]]) ; Exp(value)
217+
fRes += pow(fVec2, fVec1);
218+
219+
vector<half, NUM> hVal;
220+
// CHECK-NOT: extractelement
221+
// CHECK-NOT: insertelement
222+
// CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 29, <[[NUM]] x half> [[hvec1]]) ; Round_z(value)
223+
// CHECK: fsub fast <[[NUM]] x half> [[hvec1]], [[tmp]]
224+
hRes *= modf(hVec1, hVal);
225+
hRes += hVal;
226+
227+
vector<float, NUM> fVal;
228+
// CHECK-NOT: extractelement
229+
// CHECK-NOT: insertelement
230+
// CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 29, <[[NUM]] x float> [[fvec1]]) ; Round_z(value)
231+
// CHECK: fsub fast <[[NUM]] x float> [[fvec1]], [[tmp]]
232+
fRes *= modf(fVec1, fVal);
233+
fRes += fVal;
234+
206235
// CHECK-NOT: extractelement
207236
// CHECK-NOT: insertelement
208237
// CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> [[hvec2]], [[hvec1]]
@@ -227,6 +256,25 @@ void main() {
227256
// CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]]
228257
fRes += smoothstep(fVec1, fVec2, fVec3);
229258

259+
// Note that Fabs is tested in longvec-trivial-unary-float-intrinsics.
260+
// CHECK-NOT: extractelement
261+
// CHECK-NOT: insertelement
262+
// CHECK: [[tmp:%.*]] = sub <[[NUM]] x i16> zeroinitializer, [[svec1]]
263+
// CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[tmp]]) ; IMax(a,b)
264+
sRes += abs(sVec1);
265+
266+
// CHECK-NOT: extractelement
267+
// CHECK-NOT: insertelement
268+
// CHECK: [[tmp:%.*]] = sub <[[NUM]] x i32> zeroinitializer, [[ivec1]]
269+
// CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[tmp]]) ; IMax(a,b)
270+
iRes += abs(iVec1);
271+
272+
// CHECK-NOT: extractelement
273+
// CHECK-NOT: insertelement
274+
// CHECK: [[tmp:%.*]] = sub <[[NUM]] x i64> zeroinitializer, [[lvec1]]
275+
// CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[tmp]]) ; IMax(a,b)
276+
lRes += abs(lVec1);
277+
230278
// Intrinsics that expand into llvm ops.
231279

232280
// CHECK-NOT: extractelement

tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@
99
// RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
1010
// RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
1111
// RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
12+
// RUN: %dxc -DFUNC=ddx -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
13+
// RUN: %dxc -DFUNC=ddx_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
14+
// RUN: %dxc -DFUNC=ddx_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
15+
// RUN: %dxc -DFUNC=ddy -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
16+
// RUN: %dxc -DFUNC=ddy_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
17+
// RUN: %dxc -DFUNC=ddy_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
18+
// RUN: %dxc -DFUNC=fwidth -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
1219
// RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
1320
// RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
1421
// RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD

tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs -DOP=6 -DNUM=7 %s | FileCheck %s
2+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs -DOP=6 -DNUM=1022 %s | FileCheck %s
13
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=7 %s | FileCheck %s
24
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=1022 %s | FileCheck %s
35
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=7 %s | FileCheck %s

utils/hct/hctdb.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2629,7 +2629,7 @@ def UFI(name, **mappings):
26292629
next_op_idx,
26302630
"Unary",
26312631
"computes the rate of change of components per stamp",
2632-
"hf<",
2632+
"hf",
26332633
"rn",
26342634
[
26352635
db_dxil_param(
@@ -2647,7 +2647,7 @@ def UFI(name, **mappings):
26472647
next_op_idx,
26482648
"Unary",
26492649
"computes the rate of change of components per stamp",
2650-
"hf<",
2650+
"hf",
26512651
"rn",
26522652
[
26532653
db_dxil_param(
@@ -2665,7 +2665,7 @@ def UFI(name, **mappings):
26652665
next_op_idx,
26662666
"Unary",
26672667
"computes the rate of change of components per pixel",
2668-
"hf<",
2668+
"hf",
26692669
"rn",
26702670
[
26712671
db_dxil_param(
@@ -2683,7 +2683,7 @@ def UFI(name, **mappings):
26832683
next_op_idx,
26842684
"Unary",
26852685
"computes the rate of change of components per pixel",
2686-
"hf<",
2686+
"hf",
26872687
"rn",
26882688
[
26892689
db_dxil_param(

0 commit comments

Comments
 (0)