1- ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
1+ ; RUN: opt -S -scalarizer - dxil-op-lower < %s | FileCheck %s
22
33; Make sure dxil operation function calls for round are generated for float and half.
4- ; CHECK:call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
5- ; CHECK:call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
6- ; CHECK:call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
7-
8- ; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
94
105target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
116target triple = "dxil-pc-shadermodel6.7-library"
127; Function Attrs: noinline nounwind optnone
138define noundef i16 @umad_ushort (i16 noundef %p0 , i16 noundef %p1 , i16 noundef %p2 ) #0 {
149entry:
10+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
1511 %p2.addr = alloca i16 , align 2
1612 %p1.addr = alloca i16 , align 2
1713 %p0.addr = alloca i16 , align 2
@@ -31,6 +27,7 @@ declare i16 @llvm.dx.umad.i16(i16, i16, i16) #1
3127; Function Attrs: noinline nounwind optnone
3228define noundef i32 @umad_uint (i32 noundef %p0 , i32 noundef %p1 , i32 noundef %p2 ) #0 {
3329entry:
30+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
3431 %p2.addr = alloca i32 , align 4
3532 %p1.addr = alloca i32 , align 4
3633 %p0.addr = alloca i32 , align 4
@@ -50,6 +47,7 @@ declare i32 @llvm.dx.umad.i32(i32, i32, i32) #1
5047; Function Attrs: noinline nounwind optnone
5148define noundef i64 @umad_uint64 (i64 noundef %p0 , i64 noundef %p1 , i64 noundef %p2 ) #0 {
5249entry:
50+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
5351 %p2.addr = alloca i64 , align 8
5452 %p1.addr = alloca i64 , align 8
5553 %p0.addr = alloca i64 , align 8
@@ -65,3 +63,95 @@ entry:
6563
6664; Function Attrs: nocallback nofree nosync nounwind willreturn
6765declare i64 @llvm.dx.umad.i64 (i64 , i64 , i64 ) #1
66+
67+ ; Function Attrs: noinline nounwind optnone
68+ define noundef <4 x i16 > @umad_uint16_t4 (<4 x i16 > noundef %p0 , <4 x i16 > noundef %p1 , <4 x i16 > noundef %p2 ) #0 {
69+ entry:
70+ ; CHECK: extractelement <4 x i16> %p0, i64 0
71+ ; CHECK: extractelement <4 x i16> %p1, i64 0
72+ ; CHECK: extractelement <4 x i16> %p2, i64 0
73+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
74+ ; CHECK: extractelement <4 x i16> %p0, i64 1
75+ ; CHECK: extractelement <4 x i16> %p1, i64 1
76+ ; CHECK: extractelement <4 x i16> %p2, i64 1
77+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
78+ ; CHECK: extractelement <4 x i16> %p0, i64 2
79+ ; CHECK: extractelement <4 x i16> %p1, i64 2
80+ ; CHECK: extractelement <4 x i16> %p2, i64 2
81+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
82+ ; CHECK: extractelement <4 x i16> %p0, i64 3
83+ ; CHECK: extractelement <4 x i16> %p1, i64 3
84+ ; CHECK: extractelement <4 x i16> %p2, i64 3
85+ ; CHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
86+ ; CHECK: insertelement <4 x i16> poison, i16 %{{.*}}, i64 0
87+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 1
88+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 2
89+ ; CHECK: insertelement <4 x i16> %{{.*}}, i16 %{{.*}}, i64 3
90+ %dx.umad = call <4 x i16 > @llvm.dx.umad.v4i16 (<4 x i16 > %p0 , <4 x i16 > %p1 , <4 x i16 > %p2 )
91+ ret <4 x i16 > %dx.umad
92+ }
93+
94+ ; Function Attrs: nocallback nofree nosync nounwind willreturn
95+ declare <4 x i16 > @llvm.dx.umad.v4i16 (<4 x i16 >, <4 x i16 >, <4 x i16 >) #1
96+
97+ ; Function Attrs: noinline nounwind optnone
98+ define noundef <4 x i32 > @umad_uint4 (<4 x i32 > noundef %p0 , <4 x i32 > noundef %p1 , <4 x i32 > noundef %p2 ) #0 {
99+ entry:
100+ ; CHECK: extractelement <4 x i32> %p0, i64 0
101+ ; CHECK: extractelement <4 x i32> %p1, i64 0
102+ ; CHECK: extractelement <4 x i32> %p2, i64 0
103+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
104+ ; CHECK: extractelement <4 x i32> %p0, i64 1
105+ ; CHECK: extractelement <4 x i32> %p1, i64 1
106+ ; CHECK: extractelement <4 x i32> %p2, i64 1
107+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
108+ ; CHECK: extractelement <4 x i32> %p0, i64 2
109+ ; CHECK: extractelement <4 x i32> %p1, i64 2
110+ ; CHECK: extractelement <4 x i32> %p2, i64 2
111+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
112+ ; CHECK: extractelement <4 x i32> %p0, i64 3
113+ ; CHECK: extractelement <4 x i32> %p1, i64 3
114+ ; CHECK: extractelement <4 x i32> %p2, i64 3
115+ ; CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
116+ ; CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i64 0
117+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 1
118+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 2
119+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i64 3
120+ %dx.umad = call <4 x i32 > @llvm.dx.umad.v4i32 (<4 x i32 > %p0 , <4 x i32 > %p1 , <4 x i32 > %p2 )
121+ ret <4 x i32 > %dx.umad
122+ }
123+
124+ ; Function Attrs: nocallback nofree nosync nounwind willreturn
125+ declare <4 x i32 > @llvm.dx.umad.v4i32 (<4 x i32 >, <4 x i32 >, <4 x i32 >) #1
126+
127+ ; Function Attrs: noinline nounwind optnone
128+ define noundef <4 x i64 > @umad_uint64_t4 (<4 x i64 > noundef %p0 , <4 x i64 > noundef %p1 , <4 x i64 > noundef %p2 ) #0 {
129+ entry:
130+ ; CHECK: extractelement <4 x i64> %p0, i64 0
131+ ; CHECK: extractelement <4 x i64> %p1, i64 0
132+ ; CHECK: extractelement <4 x i64> %p2, i64 0
133+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
134+ ; CHECK: extractelement <4 x i64> %p0, i64 1
135+ ; CHECK: extractelement <4 x i64> %p1, i64 1
136+ ; CHECK: extractelement <4 x i64> %p2, i64 1
137+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
138+ ; CHECK: extractelement <4 x i64> %p0, i64 2
139+ ; CHECK: extractelement <4 x i64> %p1, i64 2
140+ ; CHECK: extractelement <4 x i64> %p2, i64 2
141+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
142+ ; CHECK: extractelement <4 x i64> %p0, i64 3
143+ ; CHECK: extractelement <4 x i64> %p1, i64 3
144+ ; CHECK: extractelement <4 x i64> %p2, i64 3
145+ ; CHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
146+ ; CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i64 0
147+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 1
148+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 2
149+ ; CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i64 3
150+ %dx.umad = call <4 x i64 > @llvm.dx.umad.v4i64 (<4 x i64 > %p0 , <4 x i64 > %p1 , <4 x i64 > %p2 )
151+ ret <4 x i64 > %dx.umad
152+ }
153+
154+ ; Function Attrs: nocallback nofree nosync nounwind willreturn
155+ declare <4 x i64 > @llvm.dx.umad.v4i64 (<4 x i64 >, <4 x i64 >, <4 x i64 >) #1
156+
157+ ; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
0 commit comments