Skip to content

Commit f9fc4a8

Browse files
committed
Update tests
Signed-off-by: John Lu <[email protected]>
1 parent 36eeb25 commit f9fc4a8

File tree

5 files changed

+82
-79
lines changed

5 files changed

+82
-79
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll

Lines changed: 44 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,13 +1330,7 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1
13301330
define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [8 x i32], ptr addrspace(1) %in, [8 x i32], ptr addrspace(1) %dummy) {
13311331
; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc:
13321332
; GFX7: ; %bb.0: ; %entry
1333-
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa
1334-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1335-
; GFX7-NEXT: v_mov_b32_e32 v2, 0
13361333
; GFX7-NEXT: s_mov_b32 s2, 0
1337-
; GFX7-NEXT: s_mov_b32 s3, 0xf000
1338-
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1339-
; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[0:3], 0 addr64
13401334
; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0
13411335
; GFX7-NEXT: s_mov_b64 vcc, 0
13421336
; GFX7-NEXT: s_and_saveexec_b64 s[6:7], s[0:1]
@@ -1355,24 +1349,22 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
13551349
; GFX7-NEXT: s_or_b64 vcc, s[8:9], s[0:1]
13561350
; GFX7-NEXT: .LBB13_2: ; %exit
13571351
; GFX7-NEXT: s_or_b64 exec, exec, s[6:7]
1352+
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa
1353+
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1354+
; GFX7-NEXT: v_mov_b32_e32 v1, 0
1355+
; GFX7-NEXT: s_mov_b32 s3, 0xf000
1356+
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1357+
; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[0:3], 0 addr64
13581358
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1359-
; GFX7-NEXT: s_waitcnt vmcnt(0)
1360-
; GFX7-NEXT: v_div_fmas_f32 v0, v1, v2, v3
13611359
; GFX7-NEXT: s_mov_b32 s2, -1
1360+
; GFX7-NEXT: s_waitcnt vmcnt(0)
1361+
; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
13621362
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
13631363
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
13641364
; GFX7-NEXT: s_endpgm
13651365
;
13661366
; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc:
13671367
; GFX8: ; %bb.0: ; %entry
1368-
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1369-
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
1370-
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1371-
; GFX8-NEXT: v_mov_b32_e32 v2, s1
1372-
; GFX8-NEXT: v_mov_b32_e32 v1, s0
1373-
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1374-
; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
1375-
; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2]
13761368
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0
13771369
; GFX8-NEXT: s_mov_b64 vcc, 0
13781370
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
@@ -1391,25 +1383,29 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
13911383
; GFX8-NEXT: s_or_b64 vcc, s[6:7], s[0:1]
13921384
; GFX8-NEXT: .LBB13_2: ; %exit
13931385
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
1394-
; GFX8-NEXT: s_waitcnt vmcnt(0)
1395-
; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
1386+
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1387+
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1388+
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1389+
; GFX8-NEXT: v_mov_b32_e32 v0, s0
1390+
; GFX8-NEXT: v_mov_b32_e32 v1, s1
1391+
; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v0, v2
1392+
; GFX8-NEXT: v_addc_u32_e64 v1, s[0:1], 0, v1, s[0:1]
1393+
; GFX8-NEXT: flat_load_dwordx3 v[0:2], v[0:1]
13961394
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13971395
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
13981396
; GFX8-NEXT: s_add_u32 s0, s0, 8
13991397
; GFX8-NEXT: s_addc_u32 s1, s1, 0
1398+
; GFX8-NEXT: s_waitcnt vmcnt(0)
1399+
; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
14001400
; GFX8-NEXT: v_mov_b32_e32 v0, s0
14011401
; GFX8-NEXT: v_mov_b32_e32 v1, s1
14021402
; GFX8-NEXT: flat_store_dword v[0:1], v2
14031403
; GFX8-NEXT: s_endpgm
14041404
;
14051405
; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
14061406
; GFX10_W32: ; %bb.0: ; %entry
1407-
; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1408-
; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1409-
; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0
1410-
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1411-
; GFX10_W32-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1]
14121407
; GFX10_W32-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
1408+
; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0
14131409
; GFX10_W32-NEXT: s_and_saveexec_b32 s1, s0
14141410
; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2
14151411
; GFX10_W32-NEXT: ; %bb.1: ; %bb
@@ -1426,22 +1422,23 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
14261422
; GFX10_W32-NEXT: s_or_b32 vcc_lo, s2, s0
14271423
; GFX10_W32-NEXT: .LBB13_2: ; %exit
14281424
; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1
1425+
; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1426+
; GFX10_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1427+
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1428+
; GFX10_W32-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1]
1429+
; GFX10_W32-NEXT: s_waitcnt_depctr 0xffe3
14291430
; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14301431
; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1431-
; GFX10_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1432+
; GFX10_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
14321433
; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
14331434
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
14341435
; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] offset:8
14351436
; GFX10_W32-NEXT: s_endpgm
14361437
;
14371438
; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
14381439
; GFX10_W64: ; %bb.0: ; %entry
1439-
; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1440-
; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1441-
; GFX10_W64-NEXT: s_mov_b64 vcc, 0
1442-
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1443-
; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1]
14441440
; GFX10_W64-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0
1441+
; GFX10_W64-NEXT: s_mov_b64 vcc, 0
14451442
; GFX10_W64-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
14461443
; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2
14471444
; GFX10_W64-NEXT: ; %bb.1: ; %bb
@@ -1458,24 +1455,25 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
14581455
; GFX10_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1]
14591456
; GFX10_W64-NEXT: .LBB13_2: ; %exit
14601457
; GFX10_W64-NEXT: s_or_b64 exec, exec, s[2:3]
1458+
; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1459+
; GFX10_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1460+
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1461+
; GFX10_W64-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1]
1462+
; GFX10_W64-NEXT: s_waitcnt_depctr 0xffe3
14611463
; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14621464
; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1463-
; GFX10_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1465+
; GFX10_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2
14641466
; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
14651467
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
14661468
; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] offset:8
14671469
; GFX10_W64-NEXT: s_endpgm
14681470
;
14691471
; GFX11_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
14701472
; GFX11_W32: ; %bb.0: ; %entry
1471-
; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x28
1472-
; GFX11_W32-NEXT: v_and_b32_e32 v3, 0x3ff, v0
1473+
; GFX11_W32-NEXT: v_and_b32_e32 v0, 0x3ff, v0
14731474
; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0
1474-
; GFX11_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v3
1475-
; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1476-
; GFX11_W32-NEXT: global_load_b96 v[0:2], v0, s[0:1]
14771475
; GFX11_W32-NEXT: s_mov_b32 s1, exec_lo
1478-
; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v3
1476+
; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v0
14791477
; GFX11_W32-NEXT: s_cbranch_execz .LBB13_2
14801478
; GFX11_W32-NEXT: ; %bb.1: ; %bb
14811479
; GFX11_W32-NEXT: s_load_b64 s[2:3], s[4:5], 0x50
@@ -1491,6 +1489,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
14911489
; GFX11_W32-NEXT: s_or_b32 vcc_lo, s2, s0
14921490
; GFX11_W32-NEXT: .LBB13_2: ; %exit
14931491
; GFX11_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1
1492+
; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x28
1493+
; GFX11_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1494+
; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1495+
; GFX11_W32-NEXT: global_load_b96 v[0:2], v0, s[0:1]
14941496
; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14951497
; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
14961498
; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
@@ -1501,14 +1503,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
15011503
;
15021504
; GFX11_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
15031505
; GFX11_W64: ; %bb.0: ; %entry
1504-
; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x28
1505-
; GFX11_W64-NEXT: v_and_b32_e32 v3, 0x3ff, v0
1506+
; GFX11_W64-NEXT: v_and_b32_e32 v0, 0x3ff, v0
15061507
; GFX11_W64-NEXT: s_mov_b64 vcc, 0
15071508
; GFX11_W64-NEXT: s_mov_b64 s[2:3], exec
1508-
; GFX11_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v3
1509-
; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1510-
; GFX11_W64-NEXT: global_load_b96 v[0:2], v0, s[0:1]
1511-
; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v3
1509+
; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v0
15121510
; GFX11_W64-NEXT: s_cbranch_execz .LBB13_2
15131511
; GFX11_W64-NEXT: ; %bb.1: ; %bb
15141512
; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x50
@@ -1524,6 +1522,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
15241522
; GFX11_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1]
15251523
; GFX11_W64-NEXT: .LBB13_2: ; %exit
15261524
; GFX11_W64-NEXT: s_or_b64 exec, exec, s[2:3]
1525+
; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x28
1526+
; GFX11_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1527+
; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1528+
; GFX11_W64-NEXT: global_load_b96 v[0:2], v0, s[0:1]
15271529
; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15281530
; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
15291531
; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2

llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -877,14 +877,11 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
877877
; CHECK-NEXT: ; =>This Loop Header: Depth=1
878878
; CHECK-NEXT: ; Child Loop BB1_3 Depth 2
879879
; CHECK-NEXT: ; Child Loop BB1_8 Depth 2
880-
; CHECK-NEXT: v_add_nc_u32_e32 v0, s4, v44
881880
; CHECK-NEXT: s_lshl_b32 s5, s4, 5
882881
; CHECK-NEXT: s_add_i32 s53, s4, 1
883882
; CHECK-NEXT: s_add_i32 s6, s4, 5
884-
; CHECK-NEXT: v_or3_b32 v47, s5, v42, s53
885-
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
886-
; CHECK-NEXT: ds_read_u8 v46, v0
887-
; CHECK-NEXT: v_mov_b32_e32 v56, s53
883+
; CHECK-NEXT: v_or3_b32 v46, s5, v42, s53
884+
; CHECK-NEXT: v_mov_b32_e32 v47, s53
888885
; CHECK-NEXT: s_mov_b32 s5, exec_lo
889886
; CHECK-NEXT: v_cmpx_lt_u32_e64 s6, v41
890887
; CHECK-NEXT: s_cbranch_execz .LBB1_5
@@ -898,46 +895,48 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
898895
; CHECK-NEXT: s_add_i32 s7, s7, 4
899896
; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43
900897
; CHECK-NEXT: s_add_i32 s8, s4, s7
901-
; CHECK-NEXT: v_add_nc_u32_e32 v0, s7, v47
898+
; CHECK-NEXT: v_add_nc_u32_e32 v0, s7, v46
902899
; CHECK-NEXT: s_add_i32 s9, s8, 5
903900
; CHECK-NEXT: s_add_i32 s8, s8, 1
904901
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s9, v41
905-
; CHECK-NEXT: v_mov_b32_e32 v56, s8
902+
; CHECK-NEXT: v_mov_b32_e32 v47, s8
906903
; CHECK-NEXT: s_or_b32 s6, vcc_lo, s6
907904
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s6
908905
; CHECK-NEXT: s_cbranch_execnz .LBB1_3
909906
; CHECK-NEXT: ; %bb.4: ; %Flow3
910907
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
911908
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
912-
; CHECK-NEXT: v_mov_b32_e32 v47, v0
909+
; CHECK-NEXT: v_mov_b32_e32 v46, v0
913910
; CHECK-NEXT: .LBB1_5: ; %Flow4
914911
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
915912
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
916913
; CHECK-NEXT: s_mov_b32 s54, exec_lo
917-
; CHECK-NEXT: v_cmpx_lt_u32_e64 v56, v41
914+
; CHECK-NEXT: v_cmpx_lt_u32_e64 v47, v41
918915
; CHECK-NEXT: s_cbranch_execz .LBB1_11
919916
; CHECK-NEXT: ; %bb.6: ; %.103.preheader
920917
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
918+
; CHECK-NEXT: v_add_nc_u32_e32 v0, s4, v44
921919
; CHECK-NEXT: s_mov_b32 s55, 0
920+
; CHECK-NEXT: ds_read_u8 v56, v0
922921
; CHECK-NEXT: s_inst_prefetch 0x1
923922
; CHECK-NEXT: s_branch .LBB1_8
924923
; CHECK-NEXT: .p2align 6
925924
; CHECK-NEXT: .LBB1_7: ; %.114
926925
; CHECK-NEXT: ; in Loop: Header=BB1_8 Depth=2
927926
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64
928-
; CHECK-NEXT: v_add_nc_u32_e32 v56, 1, v56
929927
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
930-
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v56, v41
928+
; CHECK-NEXT: v_add_nc_u32_e32 v46, 1, v46
929+
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v47, v41
931930
; CHECK-NEXT: s_or_b32 s55, vcc_lo, s55
932931
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s55
933932
; CHECK-NEXT: s_cbranch_execz .LBB1_10
934933
; CHECK-NEXT: .LBB1_8: ; %.103
935934
; CHECK-NEXT: ; Parent Loop BB1_1 Depth=1
936935
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
937-
; CHECK-NEXT: v_add_nc_u32_e32 v0, v44, v56
936+
; CHECK-NEXT: v_add_nc_u32_e32 v0, v44, v47
938937
; CHECK-NEXT: ds_read_u8 v0, v0
939938
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
940-
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v46, v0 src0_sel:BYTE_0 src1_sel:DWORD
939+
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
941940
; CHECK-NEXT: s_and_saveexec_b32 s64, s4
942941
; CHECK-NEXT: s_cbranch_execz .LBB1_7
943942
; CHECK-NEXT: ; %bb.9: ; %.110
@@ -958,7 +957,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
958957
; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43
959958
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
960959
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
961-
; CHECK-NEXT: ds_write_b32 v0, v47
960+
; CHECK-NEXT: ds_write_b32 v0, v46
962961
; CHECK-NEXT: s_branch .LBB1_7
963962
; CHECK-NEXT: .LBB1_10: ; %Flow
964963
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1

llvm/test/CodeGen/AMDGPU/set-wave-priority.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,14 @@ entry:
7272

7373
a:
7474
%v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 1, i32 0)
75+
%v3 = fadd <2 x float> %v1, %v2
7576
%v20 = extractelement <2 x float> %v2, i32 0
7677
%v21 = extractelement <2 x float> %v2, i32 1
7778
%cond2 = fcmp ult float %v20, %v21
7879
br i1 %cond2, label %b, label %c
7980

8081
b:
81-
ret <2 x float> %v2
82+
ret <2 x float> %v3
8283

8384
c:
8485
%v4 = fadd <2 x float> %v1, %v1

llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ bb:
4141
%tmp20 = extractelement <4 x float> %tmp18, i32 1
4242
%tmp21 = extractelement <4 x float> %tmp18, i32 2
4343
%tmp22 = extractelement <4 x float> %tmp18, i32 3
44-
%tmp23 = bitcast float %tmp14 to i32
44+
%tmp23 = fadd float %tmp14, %tmp22
45+
%tmp24 = bitcast float %tmp23 to i32
4546
br label %bb24
4647

4748
bb24: ; preds = %bb157, %bb
@@ -218,7 +219,7 @@ bb156: ; preds = %bb24
218219
bb157: ; preds = %bb24
219220
%tmp158 = bitcast float %tmp107 to i32
220221
%tmp159 = bitcast float %tmp107 to i32
221-
%tmp160 = add i32 %tmp23, %tmp159
222+
%tmp160 = add i32 %tmp24, %tmp159
222223
%tmp161 = bitcast i32 %tmp160 to float
223224
%tmp162 = insertelement <128 x float> poison, float %tmp103, i32 0
224225
%tmp163 = insertelement <128 x float> %tmp162, float %tmp102, i32 1

llvm/test/CodeGen/AMDGPU/wave32.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,53 +1266,53 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, p
12661266
; GFX1032-NEXT: s_clause 0x1
12671267
; GFX1032-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
12681268
; GFX1032-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34
1269-
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 2, v0
12701269
; GFX1032-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
12711270
; GFX1032-NEXT: s_mov_b32 vcc_lo, 0
1272-
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
1273-
; GFX1032-NEXT: global_load_dwordx3 v[1:3], v1, s[10:11]
12741271
; GFX1032-NEXT: s_and_saveexec_b32 s1, s0
12751272
; GFX1032-NEXT: s_cbranch_execz .LBB22_2
12761273
; GFX1032-NEXT: ; %bb.1: ; %bb
1277-
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
1278-
; GFX1032-NEXT: global_load_dword v0, v0, s[2:3] glc dlc
1274+
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
1275+
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
1276+
; GFX1032-NEXT: global_load_dword v1, v1, s[2:3] glc dlc
12791277
; GFX1032-NEXT: s_waitcnt vmcnt(0)
1280-
; GFX1032-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
1278+
; GFX1032-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
12811279
; GFX1032-NEXT: s_and_b32 vcc_lo, vcc_lo, exec_lo
12821280
; GFX1032-NEXT: .LBB22_2: ; %exit
1283-
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
12841281
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s1
1285-
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
1282+
; GFX1032-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1283+
; GFX1032-NEXT: v_mov_b32_e32 v3, 0
1284+
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
1285+
; GFX1032-NEXT: global_load_dwordx3 v[0:2], v0, s[10:11]
12861286
; GFX1032-NEXT: s_waitcnt vmcnt(0)
1287-
; GFX1032-NEXT: v_div_fmas_f32 v1, v1, v2, v3
1288-
; GFX1032-NEXT: global_store_dword v0, v1, s[8:9] offset:8
1287+
; GFX1032-NEXT: v_div_fmas_f32 v0, v0, v1, v2
1288+
; GFX1032-NEXT: global_store_dword v3, v0, s[8:9] offset:8
12891289
; GFX1032-NEXT: s_endpgm
12901290
;
12911291
; GFX1064-LABEL: test_div_fmas_f32_i1_phi_vcc:
12921292
; GFX1064: ; %bb.0: ; %entry
12931293
; GFX1064-NEXT: s_clause 0x1
12941294
; GFX1064-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
12951295
; GFX1064-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
1296-
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 2, v0
12971296
; GFX1064-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0
12981297
; GFX1064-NEXT: s_mov_b64 vcc, 0
1299-
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
1300-
; GFX1064-NEXT: global_load_dwordx3 v[1:3], v1, s[10:11]
13011298
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
13021299
; GFX1064-NEXT: s_cbranch_execz .LBB22_2
13031300
; GFX1064-NEXT: ; %bb.1: ; %bb
1304-
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
1305-
; GFX1064-NEXT: global_load_dword v0, v0, s[6:7] glc dlc
1301+
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
1302+
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
1303+
; GFX1064-NEXT: global_load_dword v1, v1, s[6:7] glc dlc
13061304
; GFX1064-NEXT: s_waitcnt vmcnt(0)
1307-
; GFX1064-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1305+
; GFX1064-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
13081306
; GFX1064-NEXT: s_and_b64 vcc, vcc, exec
13091307
; GFX1064-NEXT: .LBB22_2: ; %exit
1310-
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
13111308
; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3]
1312-
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
1309+
; GFX1064-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1310+
; GFX1064-NEXT: v_mov_b32_e32 v3, 0
1311+
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
1312+
; GFX1064-NEXT: global_load_dwordx3 v[0:2], v0, s[10:11]
13131313
; GFX1064-NEXT: s_waitcnt vmcnt(0)
1314-
; GFX1064-NEXT: v_div_fmas_f32 v1, v1, v2, v3
1315-
; GFX1064-NEXT: global_store_dword v0, v1, s[8:9] offset:8
1314+
; GFX1064-NEXT: v_div_fmas_f32 v0, v0, v1, v2
1315+
; GFX1064-NEXT: global_store_dword v3, v0, s[8:9] offset:8
13161316
; GFX1064-NEXT: s_endpgm
13171317
entry:
13181318
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

0 commit comments

Comments
 (0)