Skip to content

Commit b79a665

Browse files
authored
[AMDGPU] Remove leftover implicit operands from SI_SPILL/SI_RESTORE. (#168546)
Remove leftover implicit operands from SI_SPILL/SI_RESTORE. --------- Signed-off-by: John Lu <[email protected]>
1 parent b11b7b3 commit b79a665

13 files changed

+747
-750
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,11 +2094,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
20942094
break;
20952095

20962096
case AMDGPU::SI_SPILL_S32_TO_VGPR:
2097-
MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
2097+
mutateAndCleanupImplicit(MI, get(AMDGPU::V_WRITELANE_B32));
20982098
break;
20992099

21002100
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2101-
MI.setDesc(get(AMDGPU::V_READLANE_B32));
2101+
mutateAndCleanupImplicit(MI, get(AMDGPU::V_READLANE_B32));
21022102
break;
21032103
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
21042104
Register Dst = MI.getOperand(0).getReg();

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 636 additions & 636 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -66747,11 +66747,9 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6674766747
; SI-NEXT: v_writelane_b32 v21, s17, 13
6674866748
; SI-NEXT: .LBB97_3: ; %end
6674966749
; SI-NEXT: v_readlane_b32 s18, v21, 0
66750-
; SI-NEXT: v_readlane_b32 s19, v21, 1
66750+
; SI-NEXT: s_and_b32 s16, s40, 0xff
6675166751
; SI-NEXT: s_lshl_b32 s17, s18, 8
6675266752
; SI-NEXT: v_readlane_b32 s18, v21, 2
66753-
; SI-NEXT: s_and_b32 s16, s40, 0xff
66754-
; SI-NEXT: v_readlane_b32 s19, v21, 3
6675566753
; SI-NEXT: s_or_b32 s16, s16, s17
6675666754
; SI-NEXT: s_and_b32 s17, s18, 0xff
6675766755
; SI-NEXT: v_readlane_b32 s18, v21, 4
@@ -66773,9 +66771,8 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6677366771
; SI-NEXT: v_mov_b32_e32 v2, s16
6677466772
; SI-NEXT: v_readlane_b32 s16, v21, 6
6677566773
; SI-NEXT: s_and_b32 s14, s14, 0xff
66776-
; SI-NEXT: v_readlane_b32 s17, v21, 7
6677766774
; SI-NEXT: s_lshl_b32 s16, s16, 8
66778-
; SI-NEXT: v_readlane_b32 s19, v21, 5
66775+
; SI-NEXT: v_readlane_b32 s17, v21, 7
6677966776
; SI-NEXT: s_or_b32 s14, s14, s16
6678066777
; SI-NEXT: v_readlane_b32 s16, v21, 8
6678166778
; SI-NEXT: v_readlane_b32 s17, v21, 9
@@ -66807,8 +66804,8 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6680766804
; SI-NEXT: v_mov_b32_e32 v2, s14
6680866805
; SI-NEXT: v_readlane_b32 s14, v21, 12
6680966806
; SI-NEXT: s_and_b32 s10, s10, 0xff
66810-
; SI-NEXT: v_readlane_b32 s15, v21, 13
6681166807
; SI-NEXT: s_lshl_b32 s14, s14, 8
66808+
; SI-NEXT: v_readlane_b32 s15, v21, 13
6681266809
; SI-NEXT: s_or_b32 s10, s10, s14
6681366810
; SI-NEXT: v_readlane_b32 s14, v21, 14
6681466811
; SI-NEXT: v_readlane_b32 s15, v21, 15
@@ -66959,10 +66956,13 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6695966956
; SI-NEXT: s_and_b32 s5, s89, 0xff
6696066957
; SI-NEXT: s_lshl_b32 s5, s5, 16
6696166958
; SI-NEXT: s_lshl_b32 s6, s91, 24
66959+
; SI-NEXT: v_readlane_b32 s19, v21, 1
6696266960
; SI-NEXT: s_and_b32 s4, s4, 0xffff
6696366961
; SI-NEXT: s_or_b32 s5, s6, s5
66962+
; SI-NEXT: v_readlane_b32 s19, v21, 3
6696466963
; SI-NEXT: v_add_i32_e32 v1, vcc, 56, v0
6696566964
; SI-NEXT: s_or_b32 s4, s4, s5
66965+
; SI-NEXT: v_readlane_b32 s19, v21, 5
6696666966
; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
6696766967
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
6696866968
; SI-NEXT: v_mov_b32_e32 v1, s4
@@ -67017,6 +67017,28 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6701767017
; SI-NEXT: v_writelane_b32 v21, s4, 0
6701867018
; SI-NEXT: v_writelane_b32 v21, s5, 1
6701967019
; SI-NEXT: ; implicit-def: $sgpr4
67020+
; SI-NEXT: v_writelane_b32 v21, s4, 2
67021+
; SI-NEXT: v_writelane_b32 v21, s5, 3
67022+
; SI-NEXT: ; implicit-def: $sgpr4
67023+
; SI-NEXT: v_writelane_b32 v21, s4, 4
67024+
; SI-NEXT: v_writelane_b32 v21, s5, 5
67025+
; SI-NEXT: ; implicit-def: $sgpr4
67026+
; SI-NEXT: v_writelane_b32 v21, s4, 6
67027+
; SI-NEXT: v_writelane_b32 v21, s5, 7
67028+
; SI-NEXT: ; implicit-def: $sgpr4
67029+
; SI-NEXT: v_writelane_b32 v21, s4, 8
67030+
; SI-NEXT: v_writelane_b32 v21, s5, 9
67031+
; SI-NEXT: ; implicit-def: $sgpr4
67032+
; SI-NEXT: v_writelane_b32 v21, s4, 10
67033+
; SI-NEXT: v_writelane_b32 v21, s5, 11
67034+
; SI-NEXT: ; implicit-def: $sgpr4
67035+
; SI-NEXT: v_writelane_b32 v21, s4, 12
67036+
; SI-NEXT: v_writelane_b32 v21, s5, 13
67037+
; SI-NEXT: ; implicit-def: $sgpr4
67038+
; SI-NEXT: v_writelane_b32 v21, s4, 14
67039+
; SI-NEXT: v_writelane_b32 v21, s5, 15
67040+
; SI-NEXT: ; implicit-def: $sgpr4
67041+
; SI-NEXT: v_writelane_b32 v21, s4, 16
6702067042
; SI-NEXT: ; implicit-def: $sgpr40
6702167043
; SI-NEXT: ; implicit-def: $sgpr60
6702267044
; SI-NEXT: ; implicit-def: $sgpr74
@@ -67044,6 +67066,7 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6704467066
; SI-NEXT: ; implicit-def: $sgpr79
6704567067
; SI-NEXT: ; implicit-def: $sgpr89
6704667068
; SI-NEXT: ; implicit-def: $sgpr91
67069+
; SI-NEXT: v_writelane_b32 v21, s5, 17
6704767070
; SI-NEXT: ; implicit-def: $sgpr42
6704867071
; SI-NEXT: ; implicit-def: $sgpr66
6704967072
; SI-NEXT: ; implicit-def: $sgpr64
@@ -67060,33 +67083,10 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6706067083
; SI-NEXT: ; implicit-def: $sgpr30
6706167084
; SI-NEXT: ; implicit-def: $sgpr94
6706267085
; SI-NEXT: ; implicit-def: $sgpr92
67086+
; SI-NEXT: ; implicit-def: $sgpr4
6706367087
; SI-NEXT: ; implicit-def: $sgpr90
6706467088
; SI-NEXT: ; implicit-def: $sgpr88
6706567089
; SI-NEXT: ; implicit-def: $sgpr78
67066-
; SI-NEXT: v_writelane_b32 v21, s4, 2
67067-
; SI-NEXT: v_writelane_b32 v21, s5, 3
67068-
; SI-NEXT: ; implicit-def: $sgpr4
67069-
; SI-NEXT: v_writelane_b32 v21, s4, 4
67070-
; SI-NEXT: v_writelane_b32 v21, s5, 5
67071-
; SI-NEXT: ; implicit-def: $sgpr4
67072-
; SI-NEXT: v_writelane_b32 v21, s4, 6
67073-
; SI-NEXT: v_writelane_b32 v21, s5, 7
67074-
; SI-NEXT: ; implicit-def: $sgpr4
67075-
; SI-NEXT: v_writelane_b32 v21, s4, 8
67076-
; SI-NEXT: v_writelane_b32 v21, s5, 9
67077-
; SI-NEXT: ; implicit-def: $sgpr4
67078-
; SI-NEXT: v_writelane_b32 v21, s4, 10
67079-
; SI-NEXT: v_writelane_b32 v21, s5, 11
67080-
; SI-NEXT: ; implicit-def: $sgpr4
67081-
; SI-NEXT: v_writelane_b32 v21, s4, 12
67082-
; SI-NEXT: v_writelane_b32 v21, s5, 13
67083-
; SI-NEXT: ; implicit-def: $sgpr4
67084-
; SI-NEXT: v_writelane_b32 v21, s4, 14
67085-
; SI-NEXT: v_writelane_b32 v21, s5, 15
67086-
; SI-NEXT: ; implicit-def: $sgpr4
67087-
; SI-NEXT: v_writelane_b32 v21, s4, 16
67088-
; SI-NEXT: v_writelane_b32 v21, s5, 17
67089-
; SI-NEXT: ; implicit-def: $sgpr4
6709067090
; SI-NEXT: s_branch .LBB97_2
6709167091
;
6709267092
; VI-LABEL: bitcast_v32i16_to_v64i8_scalar:
@@ -88410,8 +88410,8 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a,
8841088410
; SI-NEXT: s_lshr_b64 s[4:5], s[74:75], 24
8841188411
; SI-NEXT: s_waitcnt expcnt(0)
8841288412
; SI-NEXT: v_writelane_b32 v41, s4, 0
88413-
; SI-NEXT: v_writelane_b32 v41, s5, 1
8841488413
; SI-NEXT: v_readfirstlane_b32 s4, v6
88414+
; SI-NEXT: v_writelane_b32 v41, s5, 1
8841588415
; SI-NEXT: s_lshr_b32 s5, s4, 16
8841688416
; SI-NEXT: v_readfirstlane_b32 s4, v7
8841788417
; SI-NEXT: s_lshr_b64 s[60:61], s[4:5], 16

llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,8 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
150150
; GCN-NEXT: ;;#ASMEND
151151
; GCN-NEXT: ;;#ASMSTART
152152
; GCN-NEXT: ;;#ASMEND
153-
; GCN-NEXT: v_readlane_b32 s0, v0, 0
154153
; GCN-NEXT: v_mov_b32_e32 v1, vcc_lo
154+
; GCN-NEXT: v_readlane_b32 s0, v0, 0
155155
; GCN-NEXT: v_readlane_b32 s1, v0, 1
156156
; GCN-NEXT: v_mov_b32_e32 v2, 0
157157
; GCN-NEXT: ;;#ASMSTART

llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ body: |
6565
; CHECK: S_NOP 0, implicit-def $exec
6666
; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
6767
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
68-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
69-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
70-
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
68+
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
69+
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0
70+
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
7171
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
7272
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
73-
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
73+
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
7474
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
7575
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
7676
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
@@ -141,12 +141,12 @@ body: |
141141
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec
142142
; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
143143
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
144-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
145-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
146-
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
144+
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
145+
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0
146+
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
147147
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
148148
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
149-
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
149+
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
150150
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
151151
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
152152
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec

llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ define void @main(i1 %arg) #0 {
4141
; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x130
4242
; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
4343
; CHECK-NEXT: v_writelane_b32 v6, s70, 20
44+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
4445
; CHECK-NEXT: v_writelane_b32 v6, s71, 21
4546
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
4647
; CHECK-NEXT: v_mov_b32_e32 v1, s4
47-
; CHECK-NEXT: v_mov_b32_e32 v2, 0
4848
; CHECK-NEXT: v_writelane_b32 v7, s8, 0
4949
; CHECK-NEXT: v_writelane_b32 v7, s9, 1
5050
; CHECK-NEXT: v_writelane_b32 v7, s10, 2
@@ -76,28 +76,28 @@ define void @main(i1 %arg) #0 {
7676
; CHECK-NEXT: v_writelane_b32 v7, s64, 28
7777
; CHECK-NEXT: v_writelane_b32 v7, s65, 29
7878
; CHECK-NEXT: v_writelane_b32 v7, s66, 30
79+
; CHECK-NEXT: v_writelane_b32 v7, s67, 31
7980
; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x1f0
8081
; CHECK-NEXT: s_load_dwordx16 s[36:51], s[68:69], 0x2f0
8182
; CHECK-NEXT: s_mov_b32 s69, s68
8283
; CHECK-NEXT: s_mov_b32 s70, s68
8384
; CHECK-NEXT: s_mov_b32 s71, s68
84-
; CHECK-NEXT: v_writelane_b32 v7, s67, 31
85-
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[68:71] dmask:0x1
86-
; CHECK-NEXT: v_readlane_b32 s52, v7, 0
8785
; CHECK-NEXT: v_mov_b32_e32 v3, v2
86+
; CHECK-NEXT: v_readlane_b32 s52, v7, 0
8887
; CHECK-NEXT: v_readlane_b32 s53, v7, 1
8988
; CHECK-NEXT: v_readlane_b32 s54, v7, 2
9089
; CHECK-NEXT: v_readlane_b32 s55, v7, 3
9190
; CHECK-NEXT: v_readlane_b32 s56, v7, 4
9291
; CHECK-NEXT: v_readlane_b32 s57, v7, 5
9392
; CHECK-NEXT: v_readlane_b32 s58, v7, 6
9493
; CHECK-NEXT: v_readlane_b32 s59, v7, 7
94+
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[68:71] dmask:0x1
9595
; CHECK-NEXT: v_and_b32_e32 v5, 1, v0
9696
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v5
9797
; CHECK-NEXT: v_readlane_b32 s60, v7, 8
9898
; CHECK-NEXT: v_readlane_b32 s61, v7, 9
99-
; CHECK-NEXT: v_readlane_b32 s62, v7, 10
10099
; CHECK-NEXT: image_sample_lz v4, v[2:3], s[52:59], s[68:71] dmask:0x1
100+
; CHECK-NEXT: v_readlane_b32 s62, v7, 10
101101
; CHECK-NEXT: v_readlane_b32 s63, v7, 11
102102
; CHECK-NEXT: v_readlane_b32 s64, v7, 12
103103
; CHECK-NEXT: v_readlane_b32 s65, v7, 13
@@ -109,7 +109,6 @@ define void @main(i1 %arg) #0 {
109109
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
110110
; CHECK-NEXT: s_cbranch_execz .LBB0_3
111111
; CHECK-NEXT: ; %bb.1: ; %bb48
112-
; CHECK-NEXT: v_readlane_b32 s52, v7, 16
113112
; CHECK-NEXT: v_readlane_b32 s60, v7, 24
114113
; CHECK-NEXT: v_readlane_b32 s61, v7, 25
115114
; CHECK-NEXT: v_readlane_b32 s62, v7, 26
@@ -120,10 +119,11 @@ define void @main(i1 %arg) #0 {
120119
; CHECK-NEXT: v_readlane_b32 s67, v7, 31
121120
; CHECK-NEXT: v_mov_b32_e32 v1, v2
122121
; CHECK-NEXT: s_and_b64 vcc, exec, -1
122+
; CHECK-NEXT: v_readlane_b32 s52, v7, 16
123123
; CHECK-NEXT: v_readlane_b32 s53, v7, 17
124124
; CHECK-NEXT: v_readlane_b32 s54, v7, 18
125-
; CHECK-NEXT: v_readlane_b32 s55, v7, 19
126125
; CHECK-NEXT: image_sample_lz v3, v[2:3], s[60:67], s[68:71] dmask:0x1
126+
; CHECK-NEXT: v_readlane_b32 s55, v7, 19
127127
; CHECK-NEXT: v_readlane_b32 s56, v7, 20
128128
; CHECK-NEXT: v_readlane_b32 s57, v7, 21
129129
; CHECK-NEXT: v_readlane_b32 s58, v7, 22
@@ -152,27 +152,25 @@ define void @main(i1 %arg) #0 {
152152
; CHECK-NEXT: s_mov_b32 s16, 0
153153
; CHECK-NEXT: s_mov_b32 s17, s16
154154
; CHECK-NEXT: v_mov_b32_e32 v0, s16
155-
; CHECK-NEXT: v_readlane_b32 s44, v7, 16
155+
; CHECK-NEXT: v_readlane_b32 s52, v7, 24
156+
; CHECK-NEXT: v_readlane_b32 s53, v7, 25
157+
; CHECK-NEXT: v_readlane_b32 s54, v7, 26
158+
; CHECK-NEXT: v_readlane_b32 s55, v7, 27
159+
; CHECK-NEXT: v_readlane_b32 s56, v7, 28
160+
; CHECK-NEXT: v_readlane_b32 s57, v7, 29
161+
; CHECK-NEXT: v_readlane_b32 s58, v7, 30
162+
; CHECK-NEXT: v_readlane_b32 s59, v7, 31
156163
; CHECK-NEXT: v_mov_b32_e32 v1, s17
157164
; CHECK-NEXT: s_mov_b32 s18, s16
158165
; CHECK-NEXT: s_mov_b32 s19, s16
166+
; CHECK-NEXT: v_readlane_b32 s44, v7, 16
159167
; CHECK-NEXT: v_readlane_b32 s45, v7, 17
160168
; CHECK-NEXT: v_readlane_b32 s46, v7, 18
161169
; CHECK-NEXT: v_readlane_b32 s47, v7, 19
162170
; CHECK-NEXT: v_readlane_b32 s48, v7, 20
163171
; CHECK-NEXT: v_readlane_b32 s49, v7, 21
164172
; CHECK-NEXT: v_readlane_b32 s50, v7, 22
165173
; CHECK-NEXT: v_readlane_b32 s51, v7, 23
166-
; CHECK-NEXT: v_readlane_b32 s52, v7, 24
167-
; CHECK-NEXT: v_readlane_b32 s53, v7, 25
168-
; CHECK-NEXT: v_readlane_b32 s54, v7, 26
169-
; CHECK-NEXT: v_readlane_b32 s55, v7, 27
170-
; CHECK-NEXT: v_readlane_b32 s56, v7, 28
171-
; CHECK-NEXT: v_readlane_b32 s57, v7, 29
172-
; CHECK-NEXT: v_readlane_b32 s58, v7, 30
173-
; CHECK-NEXT: v_readlane_b32 s59, v7, 31
174-
; CHECK-NEXT: image_sample_lz v2, v[0:1], s[44:51], s[16:19] dmask:0x1
175-
; CHECK-NEXT: v_readlane_b32 s44, v7, 0
176174
; CHECK-NEXT: v_readlane_b32 s52, v7, 8
177175
; CHECK-NEXT: v_readlane_b32 s53, v7, 9
178176
; CHECK-NEXT: v_readlane_b32 s54, v7, 10
@@ -181,12 +179,14 @@ define void @main(i1 %arg) #0 {
181179
; CHECK-NEXT: v_readlane_b32 s57, v7, 13
182180
; CHECK-NEXT: v_readlane_b32 s58, v7, 14
183181
; CHECK-NEXT: v_readlane_b32 s59, v7, 15
182+
; CHECK-NEXT: image_sample_lz v2, v[0:1], s[44:51], s[16:19] dmask:0x1
184183
; CHECK-NEXT: v_mov_b32_e32 v3, 0
185184
; CHECK-NEXT: v_mov_b32_e32 v4, v3
185+
; CHECK-NEXT: v_readlane_b32 s44, v7, 0
186186
; CHECK-NEXT: v_readlane_b32 s45, v7, 1
187+
; CHECK-NEXT: image_sample_lz v0, v[0:1], s[52:59], s[24:27] dmask:0x1
187188
; CHECK-NEXT: v_readlane_b32 s46, v7, 2
188189
; CHECK-NEXT: v_readlane_b32 s47, v7, 3
189-
; CHECK-NEXT: image_sample_lz v0, v[0:1], s[52:59], s[24:27] dmask:0x1
190190
; CHECK-NEXT: v_readlane_b32 s48, v7, 4
191191
; CHECK-NEXT: v_readlane_b32 s49, v7, 5
192192
; CHECK-NEXT: v_readlane_b32 s50, v7, 6

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,10 +1826,10 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
18261826
; GCN-NEXT: s_or_b32 s0, s0, s5
18271827
; GCN-NEXT: s_and_b32 s0, s0, 0xffff
18281828
; GCN-NEXT: s_or_b32 s0, s0, s4
1829-
; GCN-NEXT: v_mov_b32_e32 v0, s0
18301829
; GCN-NEXT: v_mov_b32_e32 v1, s1
1831-
; GCN-NEXT: v_readlane_b32 s0, v6, 0
18321830
; GCN-NEXT: v_readlane_b32 s1, v6, 1
1831+
; GCN-NEXT: v_mov_b32_e32 v0, s0
1832+
; GCN-NEXT: v_readlane_b32 s0, v6, 0
18331833
; GCN-NEXT: v_mov_b32_e32 v5, s1
18341834
; GCN-NEXT: v_mov_b32_e32 v2, s2
18351835
; GCN-NEXT: v_mov_b32_e32 v3, s3

llvm/test/CodeGen/AMDGPU/load-constant-i1.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10279,11 +10279,11 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
1027910279
; GFX8-NEXT: v_writelane_b32 v62, s3, 5
1028010280
; GFX8-NEXT: v_readlane_b32 s2, v62, 2
1028110281
; GFX8-NEXT: v_readlane_b32 s3, v62, 3
10282+
; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
1028210283
; GFX8-NEXT: v_mov_b32_e32 v35, s49
1028310284
; GFX8-NEXT: s_bfe_i64 s[48:49], s[4:5], 0x10000
1028410285
; GFX8-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1028510286
; GFX8-NEXT: v_readlane_b32 s2, v62, 0
10286-
; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
1028710287
; GFX8-NEXT: v_readlane_b32 s3, v62, 1
1028810288
; GFX8-NEXT: v_mov_b32_e32 v5, s75
1028910289
; GFX8-NEXT: v_mov_b32_e32 v13, s73
@@ -10577,8 +10577,8 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
1057710577
; GFX8-NEXT: v_mov_b32_e32 v2, s34
1057810578
; GFX8-NEXT: v_mov_b32_e32 v3, s35
1057910579
; GFX8-NEXT: v_mov_b32_e32 v4, s2
10580-
; GFX8-NEXT: v_readlane_b32 s2, v62, 4
1058110580
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
10581+
; GFX8-NEXT: v_readlane_b32 s2, v62, 4
1058210582
; GFX8-NEXT: v_readlane_b32 s3, v62, 5
1058310583
; GFX8-NEXT: v_mov_b32_e32 v5, s1
1058410584
; GFX8-NEXT: v_mov_b32_e32 v0, s30

0 commit comments

Comments
 (0)