Skip to content

Commit c9d80c5

Browse files
committed
scalarize dx_imad and dx_umad, add tests
1 parent d44754c commit c9d80c5

File tree

3 files changed

+194
-12
lines changed

3 files changed

+194
-12
lines changed

llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
5656
case Intrinsic::dx_wave_reduce_sum:
5757
case Intrinsic::dx_wave_reduce_umax:
5858
case Intrinsic::dx_wave_reduce_usum:
59+
case Intrinsic::dx_imad:
60+
case Intrinsic::dx_umad:
5961
return true;
6062
default:
6163
return false;

llvm/test/CodeGen/DirectX/imad.ll

Lines changed: 96 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
1-
; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
1+
; RUN: opt -S -scalarizer -dxil-op-lower < %s | FileCheck %s
22

33
; Make sure dxil operation function calls for round are generated for float and half.
4-
; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
5-
; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
6-
; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
7-
8-
; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
94

105
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
116
target triple = "dxil-pc-shadermodel6.7-library"
127
; Function Attrs: noinline nounwind optnone
138
define noundef i16 @imad_short(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 {
149
entry:
10+
; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
1511
%p2.addr = alloca i16, align 2
1612
%p1.addr = alloca i16, align 2
1713
%p0.addr = alloca i16, align 2
@@ -31,6 +27,7 @@ declare i16 @llvm.dx.imad.i16(i16, i16, i16) #1
3127
; Function Attrs: noinline nounwind optnone
3228
define noundef i32 @imad_int(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 {
3329
entry:
30+
; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
3431
%p2.addr = alloca i32, align 4
3532
%p1.addr = alloca i32, align 4
3633
%p0.addr = alloca i32, align 4
@@ -50,6 +47,7 @@ declare i32 @llvm.dx.imad.i32(i32, i32, i32) #1
5047
; Function Attrs: noinline nounwind optnone
5148
define noundef i64 @imad_int64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 {
5249
entry:
50+
; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
5351
%p2.addr = alloca i64, align 8
5452
%p1.addr = alloca i64, align 8
5553
%p0.addr = alloca i64, align 8
@@ -65,3 +63,95 @@ entry:
6563

6664
; Function Attrs: nocallback nofree nosync nounwind willreturn
6765
declare i64 @llvm.dx.imad.i64(i64, i64, i64) #1
66+
67+
; Function Attrs: noinline nounwind optnone
68+
define noundef <4 x i16> @imad_int16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) #0 {
69+
entry:
70+
; CHECK: extractelement <4 x i16> %p0, i64 0
71+
; CHECK: extractelement <4 x i16> %p1, i64 0
72+
; CHECK: extractelement <4 x i16> %p2, i64 0
73+
; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
74+
; CHECK: extractelement <4 x i16> %p0, i64 1
75+
; CHECK: extractelement <4 x i16> %p1, i64 1
76+
; CHECK: extractelement <4 x i16> %p2, i64 1
77+
; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
78+
; CHECK: extractelement <4 x i16> %p0, i64 2
79+
; CHECK: extractelement <4 x i16> %p1, i64 2
80+
; CHECK: extractelement <4 x i16> %p2, i64 2
81+
; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
82+
; CHECK: extractelement <4 x i16> %p0, i64 3
83+
; CHECK: extractelement <4 x i16> %p1, i64 3
84+
; CHECK: extractelement <4 x i16> %p2, i64 3
85+
; CHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
86+
; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0
87+
; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1
88+
; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2
89+
; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3
90+
%dx.imad = call <4 x i16> @llvm.dx.imad.v4i16(<4 x i16> %p0, <4 x i16> %p1, <4 x i16> %p2)
91+
ret <4 x i16> %dx.imad
92+
}
93+
94+
; Function Attrs: nocallback nofree nosync nounwind willreturn
95+
declare <4 x i16> @llvm.dx.imad.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #1
96+
97+
; Function Attrs: noinline nounwind optnone
98+
define noundef <4 x i32> @imad_int4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) #0 {
99+
entry:
100+
; CHECK: extractelement <4 x i32> %p0, i64 0
101+
; CHECK: extractelement <4 x i32> %p1, i64 0
102+
; CHECK: extractelement <4 x i32> %p2, i64 0
103+
; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
104+
; CHECK: extractelement <4 x i32> %p0, i64 1
105+
; CHECK: extractelement <4 x i32> %p1, i64 1
106+
; CHECK: extractelement <4 x i32> %p2, i64 1
107+
; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
108+
; CHECK: extractelement <4 x i32> %p0, i64 2
109+
; CHECK: extractelement <4 x i32> %p1, i64 2
110+
; CHECK: extractelement <4 x i32> %p2, i64 2
111+
; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
112+
; CHECK: extractelement <4 x i32> %p0, i64 3
113+
; CHECK: extractelement <4 x i32> %p1, i64 3
114+
; CHECK: extractelement <4 x i32> %p2, i64 3
115+
; CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
116+
; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0
117+
; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1
118+
; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2
119+
; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3
120+
%dx.imad = call <4 x i32> @llvm.dx.imad.v4i32(<4 x i32> %p0, <4 x i32> %p1, <4 x i32> %p2)
121+
ret <4 x i32> %dx.imad
122+
}
123+
124+
; Function Attrs: nocallback nofree nosync nounwind willreturn
125+
declare <4 x i32> @llvm.dx.imad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
126+
127+
; Function Attrs: noinline nounwind optnone
128+
define noundef <4 x i64> @imad_int64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) #0 {
129+
entry:
130+
; CHECK: extractelement <4 x i64> %p0, i64 0
131+
; CHECK: extractelement <4 x i64> %p1, i64 0
132+
; CHECK: extractelement <4 x i64> %p2, i64 0
133+
; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
134+
; CHECK: extractelement <4 x i64> %p0, i64 1
135+
; CHECK: extractelement <4 x i64> %p1, i64 1
136+
; CHECK: extractelement <4 x i64> %p2, i64 1
137+
; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
138+
; CHECK: extractelement <4 x i64> %p0, i64 2
139+
; CHECK: extractelement <4 x i64> %p1, i64 2
140+
; CHECK: extractelement <4 x i64> %p2, i64 2
141+
; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
142+
; CHECK: extractelement <4 x i64> %p0, i64 3
143+
; CHECK: extractelement <4 x i64> %p1, i64 3
144+
; CHECK: extractelement <4 x i64> %p2, i64 3
145+
; CHECK: call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
146+
; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0
147+
; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1
148+
; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2
149+
; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3
150+
%dx.imad = call <4 x i64> @llvm.dx.imad.v4i64(<4 x i64> %p0, <4 x i64> %p1, <4 x i64> %p2)
151+
ret <4 x i64> %dx.imad
152+
}
153+
154+
; Function Attrs: nocallback nofree nosync nounwind willreturn
155+
declare <4 x i64> @llvm.dx.imad.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) #1
156+
157+
; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}

llvm/test/CodeGen/DirectX/umad.ll

Lines changed: 96 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
1-
; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
1+
; RUN: opt -S -scalarizer -dxil-op-lower < %s | FileCheck %s
22

33
; Make sure dxil operation function calls for round are generated for float and half.
4-
; CHECK:call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
5-
; CHECK:call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
6-
; CHECK:call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
7-
8-
; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
94

105
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
116
target triple = "dxil-pc-shadermodel6.7-library"
127
; Function Attrs: noinline nounwind optnone
138
define noundef i16 @umad_ushort(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) #0 {
149
entry:
10+
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
1511
%p2.addr = alloca i16, align 2
1612
%p1.addr = alloca i16, align 2
1713
%p0.addr = alloca i16, align 2
@@ -31,6 +27,7 @@ declare i16 @llvm.dx.umad.i16(i16, i16, i16) #1
3127
; Function Attrs: noinline nounwind optnone
3228
define noundef i32 @umad_uint(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) #0 {
3329
entry:
30+
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
3431
%p2.addr = alloca i32, align 4
3532
%p1.addr = alloca i32, align 4
3633
%p0.addr = alloca i32, align 4
@@ -50,6 +47,7 @@ declare i32 @llvm.dx.umad.i32(i32, i32, i32) #1
5047
; Function Attrs: noinline nounwind optnone
5148
define noundef i64 @umad_uint64(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) #0 {
5249
entry:
50+
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
5351
%p2.addr = alloca i64, align 8
5452
%p1.addr = alloca i64, align 8
5553
%p0.addr = alloca i64, align 8
@@ -65,3 +63,95 @@ entry:
6563

6664
; Function Attrs: nocallback nofree nosync nounwind willreturn
6765
declare i64 @llvm.dx.umad.i64(i64, i64, i64) #1
66+
67+
; Function Attrs: noinline nounwind optnone
68+
define noundef <4 x i16> @umad_uint16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) #0 {
69+
entry:
70+
; CHECK: extractelement <4 x i16> %p0, i64 0
71+
; CHECK: extractelement <4 x i16> %p1, i64 0
72+
; CHECK: extractelement <4 x i16> %p2, i64 0
73+
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
74+
; CHECK: extractelement <4 x i16> %p0, i64 1
75+
; CHECK: extractelement <4 x i16> %p1, i64 1
76+
; CHECK: extractelement <4 x i16> %p2, i64 1
77+
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
78+
; CHECK: extractelement <4 x i16> %p0, i64 2
79+
; CHECK: extractelement <4 x i16> %p1, i64 2
80+
; CHECK: extractelement <4 x i16> %p2, i64 2
81+
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
82+
; CHECK: extractelement <4 x i16> %p0, i64 3
83+
; CHECK: extractelement <4 x i16> %p1, i64 3
84+
; CHECK: extractelement <4 x i16> %p2, i64 3
85+
; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
86+
; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0
87+
; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1
88+
; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2
89+
; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3
90+
%dx.umad = call <4 x i16> @llvm.dx.umad.v4i16(<4 x i16> %p0, <4 x i16> %p1, <4 x i16> %p2)
91+
ret <4 x i16> %dx.umad
92+
}
93+
94+
; Function Attrs: nocallback nofree nosync nounwind willreturn
95+
declare <4 x i16> @llvm.dx.umad.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #1
96+
97+
; Function Attrs: noinline nounwind optnone
98+
define noundef <4 x i32> @umad_uint4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) #0 {
99+
entry:
100+
; CHECK: extractelement <4 x i32> %p0, i64 0
101+
; CHECK: extractelement <4 x i32> %p1, i64 0
102+
; CHECK: extractelement <4 x i32> %p2, i64 0
103+
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
104+
; CHECK: extractelement <4 x i32> %p0, i64 1
105+
; CHECK: extractelement <4 x i32> %p1, i64 1
106+
; CHECK: extractelement <4 x i32> %p2, i64 1
107+
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
108+
; CHECK: extractelement <4 x i32> %p0, i64 2
109+
; CHECK: extractelement <4 x i32> %p1, i64 2
110+
; CHECK: extractelement <4 x i32> %p2, i64 2
111+
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
112+
; CHECK: extractelement <4 x i32> %p0, i64 3
113+
; CHECK: extractelement <4 x i32> %p1, i64 3
114+
; CHECK: extractelement <4 x i32> %p2, i64 3
115+
; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
116+
; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0
117+
; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1
118+
; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2
119+
; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3
120+
%dx.umad = call <4 x i32> @llvm.dx.umad.v4i32(<4 x i32> %p0, <4 x i32> %p1, <4 x i32> %p2)
121+
ret <4 x i32> %dx.umad
122+
}
123+
124+
; Function Attrs: nocallback nofree nosync nounwind willreturn
125+
declare <4 x i32> @llvm.dx.umad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
126+
127+
; Function Attrs: noinline nounwind optnone
128+
define noundef <4 x i64> @umad_uint64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) #0 {
129+
entry:
130+
; CHECK: extractelement <4 x i64> %p0, i64 0
131+
; CHECK: extractelement <4 x i64> %p1, i64 0
132+
; CHECK: extractelement <4 x i64> %p2, i64 0
133+
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
134+
; CHECK: extractelement <4 x i64> %p0, i64 1
135+
; CHECK: extractelement <4 x i64> %p1, i64 1
136+
; CHECK: extractelement <4 x i64> %p2, i64 1
137+
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
138+
; CHECK: extractelement <4 x i64> %p0, i64 2
139+
; CHECK: extractelement <4 x i64> %p1, i64 2
140+
; CHECK: extractelement <4 x i64> %p2, i64 2
141+
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
142+
; CHECK: extractelement <4 x i64> %p0, i64 3
143+
; CHECK: extractelement <4 x i64> %p1, i64 3
144+
; CHECK: extractelement <4 x i64> %p2, i64 3
145+
; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
146+
; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0
147+
; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1
148+
; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2
149+
; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3
150+
%dx.umad = call <4 x i64> @llvm.dx.umad.v4i64(<4 x i64> %p0, <4 x i64> %p1, <4 x i64> %p2)
151+
ret <4 x i64> %dx.umad
152+
}
153+
154+
; Function Attrs: nocallback nofree nosync nounwind willreturn
155+
declare <4 x i64> @llvm.dx.umad.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) #1
156+
157+
; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}

0 commit comments

Comments
 (0)