@@ -15,8 +15,8 @@ declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
1515declare {<2 x i64 >, <2 x i1 >} @llvm.uadd.with.overflow.v2i64 (<2 x i64 >, <2 x i64 >)
1616declare {<2 x i64 >, <2 x i1 >} @llvm.usub.with.overflow.v2i64 (<2 x i64 >, <2 x i64 >)
1717
18- define hidden %struct.uint96 @add64_32 (i64 %val64A , i64 %val64B , i32 %val32 ) {
19- ; CHECK-LABEL: add64_32 :
18+ define %struct.uint96 @v_add64_32 (i64 %val64A , i64 %val64B , i32 %val32 ) {
19+ ; CHECK-LABEL: v_add64_32 :
2020; CHECK: ; %bb.0:
2121; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2222; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2
@@ -35,8 +35,8 @@ define hidden %struct.uint96 @add64_32(i64 %val64A, i64 %val64B, i32 %val32) {
3535 ret %struct.uint96 %.fca.1.insert
3636}
3737
38- define <2 x i64 > @uadd_v2i64 (<2 x i64 > %val0 , <2 x i64 > %val1 , ptr %ptrval ) {
39- ; CHECK-LABEL: uadd_v2i64 :
38+ define <2 x i64 > @v_uadd_v2i64 (<2 x i64 > %val0 , <2 x i64 > %val1 , ptr %ptrval ) {
39+ ; CHECK-LABEL: v_uadd_v2i64 :
4040; CHECK: ; %bb.0:
4141; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4242; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, v2, v6
@@ -60,8 +60,8 @@ define <2 x i64> @uadd_v2i64(<2 x i64> %val0, <2 x i64> %val1, ptr %ptrval) {
6060 ret <2 x i64 > %res
6161}
6262
63- define <2 x i64 > @usub_v2i64 (<2 x i64 > %val0 , <2 x i64 > %val1 , ptr %ptrval ) {
64- ; CHECK-LABEL: usub_v2i64 :
63+ define <2 x i64 > @v_usub_v2i64 (<2 x i64 > %val0 , <2 x i64 > %val1 , ptr %ptrval ) {
64+ ; CHECK-LABEL: v_usub_v2i64 :
6565; CHECK: ; %bb.0:
6666; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6767; CHECK-NEXT: v_sub_co_u32_e32 v6, vcc, v2, v6
@@ -85,8 +85,8 @@ define <2 x i64> @usub_v2i64(<2 x i64> %val0, <2 x i64> %val1, ptr %ptrval) {
8585 ret <2 x i64 > %res
8686}
8787
88- define i64 @uadd_i64 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
89- ; CHECK-LABEL: uadd_i64 :
88+ define i64 @v_uadd_i64 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
89+ ; CHECK-LABEL: v_uadd_i64 :
9090; CHECK: ; %bb.0:
9191; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9292; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
@@ -105,8 +105,8 @@ define i64 @uadd_i64(i64 %val0, i64 %val1, ptr %ptrval) {
105105 ret i64 %res
106106}
107107
108- define i64 @uadd_p1 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
109- ; CHECK-LABEL: uadd_p1 :
108+ define i64 @v_uadd_p1 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
109+ ; CHECK-LABEL: v_uadd_p1 :
110110; CHECK: ; %bb.0:
111111; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112112; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
@@ -125,8 +125,8 @@ define i64 @uadd_p1(i64 %val0, i64 %val1, ptr %ptrval) {
125125 ret i64 %res
126126}
127127
128- define i64 @uadd_n1 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
129- ; CHECK-LABEL: uadd_n1 :
128+ define i64 @v_uadd_n1 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
129+ ; CHECK-LABEL: v_uadd_n1 :
130130; CHECK: ; %bb.0:
131131; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132132; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, -1, v0
@@ -145,8 +145,8 @@ define i64 @uadd_n1(i64 %val0, i64 %val1, ptr %ptrval) {
145145 ret i64 %res
146146}
147147
148- define i64 @usub_p1 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
149- ; CHECK-LABEL: usub_p1 :
148+ define i64 @v_usub_p1 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
149+ ; CHECK-LABEL: v_usub_p1 :
150150; CHECK: ; %bb.0:
151151; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152152; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, -1, v0
@@ -165,8 +165,8 @@ define i64 @usub_p1(i64 %val0, i64 %val1, ptr %ptrval) {
165165 ret i64 %res
166166}
167167
168- define i64 @usub_n1 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
169- ; CHECK-LABEL: usub_n1 :
168+ define i64 @v_usub_n1 (i64 %val0 , i64 %val1 , ptr %ptrval ) {
169+ ; CHECK-LABEL: v_usub_n1 :
170170; CHECK: ; %bb.0:
171171; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172172; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, 1, v0
@@ -184,3 +184,217 @@ define i64 @usub_n1(i64 %val0, i64 %val1, ptr %ptrval) {
184184 store i64 %val , ptr %ptrval
185185 ret i64 %res
186186}
187+
188+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
189+ ; test SGPR
190+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
191+
192+ define %struct.uint96 @s_add64_32 (i64 inreg %val64A , i64 inreg %val64B , i32 inreg %val32 ) {
193+ ; CHECK-LABEL: s_add64_32:
194+ ; CHECK: ; %bb.0:
195+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196+ ; CHECK-NEXT: s_add_u32 s4, s16, s18
197+ ; CHECK-NEXT: v_mov_b32_e32 v0, s16
198+ ; CHECK-NEXT: s_addc_u32 s5, s17, s19
199+ ; CHECK-NEXT: v_mov_b32_e32 v1, s17
200+ ; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
201+ ; CHECK-NEXT: v_mov_b32_e32 v0, s4
202+ ; CHECK-NEXT: s_cmp_lg_u64 vcc, 0
203+ ; CHECK-NEXT: s_addc_u32 s6, s20, 0
204+ ; CHECK-NEXT: v_mov_b32_e32 v1, s5
205+ ; CHECK-NEXT: v_mov_b32_e32 v2, s6
206+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
207+ %sum64 = add i64 %val64A , %val64B
208+ %obit = icmp ult i64 %sum64 , %val64A
209+ %obit32 = zext i1 %obit to i32
210+ %sum32 = add i32 %val32 , %obit32
211+ %.fca.0.insert = insertvalue %struct.uint96 poison, i64 %sum64 , 0
212+ %.fca.1.insert = insertvalue %struct.uint96 %.fca.0.insert , i32 %sum32 , 1
213+ ret %struct.uint96 %.fca.1.insert
214+ }
215+
216+ define <2 x i64 > @s_uadd_v2i64 (<2 x i64 > inreg %val0 , <2 x i64 > inreg %val1 , ptr %ptrval ) {
217+ ; CHECK-LABEL: s_uadd_v2i64:
218+ ; CHECK: ; %bb.0:
219+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220+ ; CHECK-NEXT: s_add_u32 s4, s18, s22
221+ ; CHECK-NEXT: s_addc_u32 s5, s19, s23
222+ ; CHECK-NEXT: s_add_u32 s6, s16, s20
223+ ; CHECK-NEXT: v_mov_b32_e32 v2, s16
224+ ; CHECK-NEXT: s_addc_u32 s7, s17, s21
225+ ; CHECK-NEXT: v_mov_b32_e32 v3, s17
226+ ; CHECK-NEXT: v_mov_b32_e32 v8, s18
227+ ; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
228+ ; CHECK-NEXT: v_mov_b32_e32 v9, s19
229+ ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
230+ ; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
231+ ; CHECK-NEXT: v_mov_b32_e32 v4, s6
232+ ; CHECK-NEXT: v_mov_b32_e32 v5, s7
233+ ; CHECK-NEXT: v_mov_b32_e32 v6, s4
234+ ; CHECK-NEXT: v_mov_b32_e32 v7, s5
235+ ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
236+ ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
237+ ; CHECK-NEXT: v_mov_b32_e32 v0, v2
238+ ; CHECK-NEXT: v_mov_b32_e32 v1, v2
239+ ; CHECK-NEXT: v_mov_b32_e32 v2, v3
240+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
241+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
242+ %pair = call {<2 x i64 >, <2 x i1 >} @llvm.uadd.with.overflow.v2i64 (<2 x i64 > %val0 , <2 x i64 > %val1 )
243+ %val = extractvalue {<2 x i64 >, <2 x i1 >} %pair , 0
244+ %obit = extractvalue {<2 x i64 >, <2 x i1 >} %pair , 1
245+ %res = sext <2 x i1 > %obit to <2 x i64 >
246+ store <2 x i64 > %val , ptr %ptrval
247+ ret <2 x i64 > %res
248+ }
249+
250+ define <2 x i64 > @s_usub_v2i64 (<2 x i64 > inreg %val0 , <2 x i64 > inreg %val1 , ptr %ptrval ) {
251+ ; CHECK-LABEL: s_usub_v2i64:
252+ ; CHECK: ; %bb.0:
253+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254+ ; CHECK-NEXT: s_sub_u32 s4, s18, s22
255+ ; CHECK-NEXT: s_subb_u32 s5, s19, s23
256+ ; CHECK-NEXT: s_sub_u32 s6, s16, s20
257+ ; CHECK-NEXT: v_mov_b32_e32 v2, s16
258+ ; CHECK-NEXT: s_subb_u32 s7, s17, s21
259+ ; CHECK-NEXT: v_mov_b32_e32 v3, s17
260+ ; CHECK-NEXT: v_mov_b32_e32 v8, s18
261+ ; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
262+ ; CHECK-NEXT: v_mov_b32_e32 v9, s19
263+ ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
264+ ; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[8:9]
265+ ; CHECK-NEXT: v_mov_b32_e32 v4, s6
266+ ; CHECK-NEXT: v_mov_b32_e32 v5, s7
267+ ; CHECK-NEXT: v_mov_b32_e32 v6, s4
268+ ; CHECK-NEXT: v_mov_b32_e32 v7, s5
269+ ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
270+ ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
271+ ; CHECK-NEXT: v_mov_b32_e32 v0, v2
272+ ; CHECK-NEXT: v_mov_b32_e32 v1, v2
273+ ; CHECK-NEXT: v_mov_b32_e32 v2, v3
274+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
275+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
276+ %pair = call {<2 x i64 >, <2 x i1 >} @llvm.usub.with.overflow.v2i64 (<2 x i64 > %val0 , <2 x i64 > %val1 )
277+ %val = extractvalue {<2 x i64 >, <2 x i1 >} %pair , 0
278+ %obit = extractvalue {<2 x i64 >, <2 x i1 >} %pair , 1
279+ %res = sext <2 x i1 > %obit to <2 x i64 >
280+ store <2 x i64 > %val , ptr %ptrval
281+ ret <2 x i64 > %res
282+ }
283+
284+ define i64 @s_uadd_i64 (i64 inreg %val0 , i64 inreg %val1 , ptr %ptrval ) {
285+ ; CHECK-LABEL: s_uadd_i64:
286+ ; CHECK: ; %bb.0:
287+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288+ ; CHECK-NEXT: s_add_u32 s4, s16, s18
289+ ; CHECK-NEXT: v_mov_b32_e32 v2, s16
290+ ; CHECK-NEXT: s_addc_u32 s5, s17, s19
291+ ; CHECK-NEXT: v_mov_b32_e32 v3, s17
292+ ; CHECK-NEXT: v_mov_b32_e32 v4, s4
293+ ; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[2:3]
294+ ; CHECK-NEXT: v_mov_b32_e32 v5, s5
295+ ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
296+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
297+ ; CHECK-NEXT: v_mov_b32_e32 v1, v0
298+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
299+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
300+ %pair = call {i64 , i1 } @llvm.uadd.with.overflow.i64 (i64 %val0 , i64 %val1 )
301+ %val = extractvalue {i64 , i1 } %pair , 0
302+ %obit = extractvalue {i64 , i1 } %pair , 1
303+ %res = sext i1 %obit to i64
304+ store i64 %val , ptr %ptrval
305+ ret i64 %res
306+ }
307+
308+ define i64 @s_uadd_p1 (i64 inreg %val0 , i64 inreg %val1 , ptr %ptrval ) {
309+ ; CHECK-LABEL: s_uadd_p1:
310+ ; CHECK: ; %bb.0:
311+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312+ ; CHECK-NEXT: s_add_u32 s4, s16, 1
313+ ; CHECK-NEXT: s_addc_u32 s5, s17, 0
314+ ; CHECK-NEXT: s_cmp_eq_u64 s[4:5], 0
315+ ; CHECK-NEXT: v_mov_b32_e32 v2, s4
316+ ; CHECK-NEXT: v_mov_b32_e32 v3, s5
317+ ; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
318+ ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
319+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
320+ ; CHECK-NEXT: v_mov_b32_e32 v1, v0
321+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
322+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
323+ %pair = call {i64 , i1 } @llvm.uadd.with.overflow.i64 (i64 %val0 , i64 1 )
324+ %val = extractvalue {i64 , i1 } %pair , 0
325+ %obit = extractvalue {i64 , i1 } %pair , 1
326+ %res = sext i1 %obit to i64
327+ store i64 %val , ptr %ptrval
328+ ret i64 %res
329+ }
330+
331+ define i64 @s_uadd_n1 (i64 inreg %val0 , i64 inreg %val1 , ptr %ptrval ) {
332+ ; CHECK-LABEL: s_uadd_n1:
333+ ; CHECK: ; %bb.0:
334+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335+ ; CHECK-NEXT: s_add_u32 s4, s16, -1
336+ ; CHECK-NEXT: s_addc_u32 s5, s17, -1
337+ ; CHECK-NEXT: s_cmp_lg_u64 s[16:17], 0
338+ ; CHECK-NEXT: v_mov_b32_e32 v2, s4
339+ ; CHECK-NEXT: v_mov_b32_e32 v3, s5
340+ ; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
341+ ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
342+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
343+ ; CHECK-NEXT: v_mov_b32_e32 v1, v0
344+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
345+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
346+ %pair = call {i64 , i1 } @llvm.uadd.with.overflow.i64 (i64 %val0 , i64 -1 )
347+ %val = extractvalue {i64 , i1 } %pair , 0
348+ %obit = extractvalue {i64 , i1 } %pair , 1
349+ %res = sext i1 %obit to i64
350+ store i64 %val , ptr %ptrval
351+ ret i64 %res
352+ }
353+
354+ define i64 @s_usub_p1 (i64 inreg %val0 , i64 inreg %val1 , ptr %ptrval ) {
355+ ; CHECK-LABEL: s_usub_p1:
356+ ; CHECK: ; %bb.0:
357+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358+ ; CHECK-NEXT: s_add_u32 s4, s16, -1
359+ ; CHECK-NEXT: v_mov_b32_e32 v2, s16
360+ ; CHECK-NEXT: s_addc_u32 s5, s17, -1
361+ ; CHECK-NEXT: v_mov_b32_e32 v3, s17
362+ ; CHECK-NEXT: v_mov_b32_e32 v4, s4
363+ ; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[2:3]
364+ ; CHECK-NEXT: v_mov_b32_e32 v5, s5
365+ ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
366+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
367+ ; CHECK-NEXT: v_mov_b32_e32 v1, v0
368+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
369+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
370+ %pair = call {i64 , i1 } @llvm.usub.with.overflow.i64 (i64 %val0 , i64 1 )
371+ %val = extractvalue {i64 , i1 } %pair , 0
372+ %obit = extractvalue {i64 , i1 } %pair , 1
373+ %res = sext i1 %obit to i64
374+ store i64 %val , ptr %ptrval
375+ ret i64 %res
376+ }
377+
378+ define i64 @s_usub_n1 (i64 inreg %val0 , i64 inreg %val1 , ptr %ptrval ) {
379+ ; CHECK-LABEL: s_usub_n1:
380+ ; CHECK: ; %bb.0:
381+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382+ ; CHECK-NEXT: s_add_u32 s4, s16, 1
383+ ; CHECK-NEXT: v_mov_b32_e32 v2, s16
384+ ; CHECK-NEXT: s_addc_u32 s5, s17, 0
385+ ; CHECK-NEXT: v_mov_b32_e32 v3, s17
386+ ; CHECK-NEXT: v_mov_b32_e32 v4, s4
387+ ; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[2:3]
388+ ; CHECK-NEXT: v_mov_b32_e32 v5, s5
389+ ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
390+ ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
391+ ; CHECK-NEXT: v_mov_b32_e32 v1, v0
392+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
393+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
394+ %pair = call {i64 , i1 } @llvm.usub.with.overflow.i64 (i64 %val0 , i64 -1 )
395+ %val = extractvalue {i64 , i1 } %pair , 0
396+ %obit = extractvalue {i64 , i1 } %pair , 1
397+ %res = sext i1 %obit to i64
398+ store i64 %val , ptr %ptrval
399+ ret i64 %res
400+ }
0 commit comments