@@ -1931,7 +1931,6 @@ define <20 x i32> @bitcast_v40i16_to_v20i32(<40 x i16> %a, i32 %b) {
1931
1931
; GFX9-LABEL: bitcast_v40i16_to_v20i32:
1932
1932
; GFX9: ; %bb.0:
1933
1933
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1934
- ; GFX9-NEXT: v_mov_b32_e32 v32, v19
1935
1934
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
1936
1935
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
1937
1936
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
@@ -1948,6 +1947,7 @@ define <20 x i32> @bitcast_v40i16_to_v20i32(<40 x i16> %a, i32 %b) {
1948
1947
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
1949
1948
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1950
1949
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
1950
+ ; GFX9-NEXT: v_mov_b32_e32 v32, v19
1951
1951
; GFX9-NEXT: v_mov_b32_e32 v33, v18
1952
1952
; GFX9-NEXT: v_mov_b32_e32 v43, v0
1953
1953
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32
@@ -3584,7 +3584,6 @@ define <20 x i32> @bitcast_v40f16_to_v20i32(<40 x half> %a, i32 %b) {
3584
3584
; GFX9-LABEL: bitcast_v40f16_to_v20i32:
3585
3585
; GFX9: ; %bb.0:
3586
3586
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3587
- ; GFX9-NEXT: v_mov_b32_e32 v32, v19
3588
3587
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
3589
3588
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
3590
3589
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
@@ -3601,6 +3600,7 @@ define <20 x i32> @bitcast_v40f16_to_v20i32(<40 x half> %a, i32 %b) {
3601
3600
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
3602
3601
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
3603
3602
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
3603
+ ; GFX9-NEXT: v_mov_b32_e32 v32, v19
3604
3604
; GFX9-NEXT: v_mov_b32_e32 v33, v18
3605
3605
; GFX9-NEXT: v_mov_b32_e32 v43, v0
3606
3606
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32
@@ -5510,7 +5510,6 @@ define <20 x float> @bitcast_v40i16_to_v20f32(<40 x i16> %a, i32 %b) {
5510
5510
; GFX9-LABEL: bitcast_v40i16_to_v20f32:
5511
5511
; GFX9: ; %bb.0:
5512
5512
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5513
- ; GFX9-NEXT: v_mov_b32_e32 v32, v19
5514
5513
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
5515
5514
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
5516
5515
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
@@ -5527,6 +5526,7 @@ define <20 x float> @bitcast_v40i16_to_v20f32(<40 x i16> %a, i32 %b) {
5527
5526
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
5528
5527
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
5529
5528
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
5529
+ ; GFX9-NEXT: v_mov_b32_e32 v32, v19
5530
5530
; GFX9-NEXT: v_mov_b32_e32 v33, v18
5531
5531
; GFX9-NEXT: v_mov_b32_e32 v43, v0
5532
5532
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32
@@ -7153,7 +7153,6 @@ define <20 x float> @bitcast_v40f16_to_v20f32(<40 x half> %a, i32 %b) {
7153
7153
; GFX9-LABEL: bitcast_v40f16_to_v20f32:
7154
7154
; GFX9: ; %bb.0:
7155
7155
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7156
- ; GFX9-NEXT: v_mov_b32_e32 v32, v19
7157
7156
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
7158
7157
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
7159
7158
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
@@ -7170,6 +7169,7 @@ define <20 x float> @bitcast_v40f16_to_v20f32(<40 x half> %a, i32 %b) {
7170
7169
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
7171
7170
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
7172
7171
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
7172
+ ; GFX9-NEXT: v_mov_b32_e32 v32, v19
7173
7173
; GFX9-NEXT: v_mov_b32_e32 v33, v18
7174
7174
; GFX9-NEXT: v_mov_b32_e32 v43, v0
7175
7175
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32
@@ -8814,7 +8814,6 @@ define <10 x i64> @bitcast_v40i16_to_v10i64(<40 x i16> %a, i32 %b) {
8814
8814
; GFX9-LABEL: bitcast_v40i16_to_v10i64:
8815
8815
; GFX9: ; %bb.0:
8816
8816
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8817
- ; GFX9-NEXT: v_mov_b32_e32 v32, v19
8818
8817
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
8819
8818
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
8820
8819
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
@@ -8831,6 +8830,7 @@ define <10 x i64> @bitcast_v40i16_to_v10i64(<40 x i16> %a, i32 %b) {
8831
8830
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
8832
8831
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
8833
8832
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
8833
+ ; GFX9-NEXT: v_mov_b32_e32 v32, v19
8834
8834
; GFX9-NEXT: v_mov_b32_e32 v33, v18
8835
8835
; GFX9-NEXT: v_mov_b32_e32 v43, v0
8836
8836
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32
@@ -10472,7 +10472,6 @@ define <10 x i64> @bitcast_v40f16_to_v10i64(<40 x half> %a, i32 %b) {
10472
10472
; GFX9-LABEL: bitcast_v40f16_to_v10i64:
10473
10473
; GFX9: ; %bb.0:
10474
10474
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10475
- ; GFX9-NEXT: v_mov_b32_e32 v32, v19
10476
10475
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
10477
10476
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
10478
10477
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
@@ -10489,6 +10488,7 @@ define <10 x i64> @bitcast_v40f16_to_v10i64(<40 x half> %a, i32 %b) {
10489
10488
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
10490
10489
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
10491
10490
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
10491
+ ; GFX9-NEXT: v_mov_b32_e32 v32, v19
10492
10492
; GFX9-NEXT: v_mov_b32_e32 v33, v18
10493
10493
; GFX9-NEXT: v_mov_b32_e32 v43, v0
10494
10494
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32
@@ -11823,7 +11823,6 @@ define <10 x double> @bitcast_v40i16_to_v10f64(<40 x i16> %a, i32 %b) {
11823
11823
; GFX9-LABEL: bitcast_v40i16_to_v10f64:
11824
11824
; GFX9: ; %bb.0:
11825
11825
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11826
- ; GFX9-NEXT: v_mov_b32_e32 v32, v19
11827
11826
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
11828
11827
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
11829
11828
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
@@ -11840,6 +11839,7 @@ define <10 x double> @bitcast_v40i16_to_v10f64(<40 x i16> %a, i32 %b) {
11840
11839
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
11841
11840
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
11842
11841
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
11842
+ ; GFX9-NEXT: v_mov_b32_e32 v32, v19
11843
11843
; GFX9-NEXT: v_mov_b32_e32 v33, v18
11844
11844
; GFX9-NEXT: v_mov_b32_e32 v43, v0
11845
11845
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32
@@ -13426,7 +13426,6 @@ define <10 x double> @bitcast_v40f16_to_v10f64(<40 x half> %a, i32 %b) {
13426
13426
; GFX9-LABEL: bitcast_v40f16_to_v10f64:
13427
13427
; GFX9: ; %bb.0:
13428
13428
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13429
- ; GFX9-NEXT: v_mov_b32_e32 v32, v19
13430
13429
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
13431
13430
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
13432
13431
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
@@ -13443,6 +13442,7 @@ define <10 x double> @bitcast_v40f16_to_v10f64(<40 x half> %a, i32 %b) {
13443
13442
; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
13444
13443
; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
13445
13444
; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
13445
+ ; GFX9-NEXT: v_mov_b32_e32 v32, v19
13446
13446
; GFX9-NEXT: v_mov_b32_e32 v33, v18
13447
13447
; GFX9-NEXT: v_mov_b32_e32 v43, v0
13448
13448
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32
0 commit comments