Skip to content

Commit 4511c35

Browse files
authored
Revert "[AMDGPU] Remove leftover implicit operands from SI_SPILL/SI_RESTORE." (#169068)
PR causes build failures with expensive checks enabled Reverts #168546
1 parent 39d4dfb commit 4511c35

13 files changed

+750
-747
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,11 +2094,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
20942094
break;
20952095

20962096
case AMDGPU::SI_SPILL_S32_TO_VGPR:
2097-
mutateAndCleanupImplicit(MI, get(AMDGPU::V_WRITELANE_B32));
2097+
MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
20982098
break;
20992099

21002100
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2101-
mutateAndCleanupImplicit(MI, get(AMDGPU::V_READLANE_B32));
2101+
MI.setDesc(get(AMDGPU::V_READLANE_B32));
21022102
break;
21032103
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
21042104
Register Dst = MI.getOperand(0).getReg();

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 636 additions & 636 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -66739,9 +66739,11 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6673966739
; SI-NEXT: v_writelane_b32 v21, s17, 13
6674066740
; SI-NEXT: .LBB97_3: ; %end
6674166741
; SI-NEXT: v_readlane_b32 s18, v21, 0
66742-
; SI-NEXT: s_and_b32 s16, s40, 0xff
66742+
; SI-NEXT: v_readlane_b32 s19, v21, 1
6674366743
; SI-NEXT: s_lshl_b32 s17, s18, 8
6674466744
; SI-NEXT: v_readlane_b32 s18, v21, 2
66745+
; SI-NEXT: s_and_b32 s16, s40, 0xff
66746+
; SI-NEXT: v_readlane_b32 s19, v21, 3
6674566747
; SI-NEXT: s_or_b32 s16, s16, s17
6674666748
; SI-NEXT: s_and_b32 s17, s18, 0xff
6674766749
; SI-NEXT: v_readlane_b32 s18, v21, 4
@@ -66763,8 +66765,9 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6676366765
; SI-NEXT: v_mov_b32_e32 v2, s16
6676466766
; SI-NEXT: v_readlane_b32 s16, v21, 6
6676566767
; SI-NEXT: s_and_b32 s14, s14, 0xff
66766-
; SI-NEXT: s_lshl_b32 s16, s16, 8
6676766768
; SI-NEXT: v_readlane_b32 s17, v21, 7
66769+
; SI-NEXT: s_lshl_b32 s16, s16, 8
66770+
; SI-NEXT: v_readlane_b32 s19, v21, 5
6676866771
; SI-NEXT: s_or_b32 s14, s14, s16
6676966772
; SI-NEXT: v_readlane_b32 s16, v21, 8
6677066773
; SI-NEXT: v_readlane_b32 s17, v21, 9
@@ -66796,8 +66799,8 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6679666799
; SI-NEXT: v_mov_b32_e32 v2, s14
6679766800
; SI-NEXT: v_readlane_b32 s14, v21, 12
6679866801
; SI-NEXT: s_and_b32 s10, s10, 0xff
66799-
; SI-NEXT: s_lshl_b32 s14, s14, 8
6680066802
; SI-NEXT: v_readlane_b32 s15, v21, 13
66803+
; SI-NEXT: s_lshl_b32 s14, s14, 8
6680166804
; SI-NEXT: s_or_b32 s10, s10, s14
6680266805
; SI-NEXT: v_readlane_b32 s14, v21, 14
6680366806
; SI-NEXT: v_readlane_b32 s15, v21, 15
@@ -66948,13 +66951,10 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6694866951
; SI-NEXT: s_and_b32 s5, s89, 0xff
6694966952
; SI-NEXT: s_lshl_b32 s5, s5, 16
6695066953
; SI-NEXT: s_lshl_b32 s6, s91, 24
66951-
; SI-NEXT: v_readlane_b32 s19, v21, 1
6695266954
; SI-NEXT: s_and_b32 s4, s4, 0xffff
6695366955
; SI-NEXT: s_or_b32 s5, s6, s5
66954-
; SI-NEXT: v_readlane_b32 s19, v21, 3
6695566956
; SI-NEXT: v_add_i32_e32 v1, vcc, 56, v0
6695666957
; SI-NEXT: s_or_b32 s4, s4, s5
66957-
; SI-NEXT: v_readlane_b32 s19, v21, 5
6695866958
; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
6695966959
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
6696066960
; SI-NEXT: v_mov_b32_e32 v1, s4
@@ -67009,28 +67009,6 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6700967009
; SI-NEXT: v_writelane_b32 v21, s4, 0
6701067010
; SI-NEXT: v_writelane_b32 v21, s5, 1
6701167011
; SI-NEXT: ; implicit-def: $sgpr4
67012-
; SI-NEXT: v_writelane_b32 v21, s4, 2
67013-
; SI-NEXT: v_writelane_b32 v21, s5, 3
67014-
; SI-NEXT: ; implicit-def: $sgpr4
67015-
; SI-NEXT: v_writelane_b32 v21, s4, 4
67016-
; SI-NEXT: v_writelane_b32 v21, s5, 5
67017-
; SI-NEXT: ; implicit-def: $sgpr4
67018-
; SI-NEXT: v_writelane_b32 v21, s4, 6
67019-
; SI-NEXT: v_writelane_b32 v21, s5, 7
67020-
; SI-NEXT: ; implicit-def: $sgpr4
67021-
; SI-NEXT: v_writelane_b32 v21, s4, 8
67022-
; SI-NEXT: v_writelane_b32 v21, s5, 9
67023-
; SI-NEXT: ; implicit-def: $sgpr4
67024-
; SI-NEXT: v_writelane_b32 v21, s4, 10
67025-
; SI-NEXT: v_writelane_b32 v21, s5, 11
67026-
; SI-NEXT: ; implicit-def: $sgpr4
67027-
; SI-NEXT: v_writelane_b32 v21, s4, 12
67028-
; SI-NEXT: v_writelane_b32 v21, s5, 13
67029-
; SI-NEXT: ; implicit-def: $sgpr4
67030-
; SI-NEXT: v_writelane_b32 v21, s4, 14
67031-
; SI-NEXT: v_writelane_b32 v21, s5, 15
67032-
; SI-NEXT: ; implicit-def: $sgpr4
67033-
; SI-NEXT: v_writelane_b32 v21, s4, 16
6703467012
; SI-NEXT: ; implicit-def: $sgpr40
6703567013
; SI-NEXT: ; implicit-def: $sgpr60
6703667014
; SI-NEXT: ; implicit-def: $sgpr74
@@ -67058,7 +67036,6 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6705867036
; SI-NEXT: ; implicit-def: $sgpr79
6705967037
; SI-NEXT: ; implicit-def: $sgpr89
6706067038
; SI-NEXT: ; implicit-def: $sgpr91
67061-
; SI-NEXT: v_writelane_b32 v21, s5, 17
6706267039
; SI-NEXT: ; implicit-def: $sgpr42
6706367040
; SI-NEXT: ; implicit-def: $sgpr66
6706467041
; SI-NEXT: ; implicit-def: $sgpr64
@@ -67075,10 +67052,33 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
6707567052
; SI-NEXT: ; implicit-def: $sgpr30
6707667053
; SI-NEXT: ; implicit-def: $sgpr94
6707767054
; SI-NEXT: ; implicit-def: $sgpr92
67078-
; SI-NEXT: ; implicit-def: $sgpr4
6707967055
; SI-NEXT: ; implicit-def: $sgpr90
6708067056
; SI-NEXT: ; implicit-def: $sgpr88
6708167057
; SI-NEXT: ; implicit-def: $sgpr78
67058+
; SI-NEXT: v_writelane_b32 v21, s4, 2
67059+
; SI-NEXT: v_writelane_b32 v21, s5, 3
67060+
; SI-NEXT: ; implicit-def: $sgpr4
67061+
; SI-NEXT: v_writelane_b32 v21, s4, 4
67062+
; SI-NEXT: v_writelane_b32 v21, s5, 5
67063+
; SI-NEXT: ; implicit-def: $sgpr4
67064+
; SI-NEXT: v_writelane_b32 v21, s4, 6
67065+
; SI-NEXT: v_writelane_b32 v21, s5, 7
67066+
; SI-NEXT: ; implicit-def: $sgpr4
67067+
; SI-NEXT: v_writelane_b32 v21, s4, 8
67068+
; SI-NEXT: v_writelane_b32 v21, s5, 9
67069+
; SI-NEXT: ; implicit-def: $sgpr4
67070+
; SI-NEXT: v_writelane_b32 v21, s4, 10
67071+
; SI-NEXT: v_writelane_b32 v21, s5, 11
67072+
; SI-NEXT: ; implicit-def: $sgpr4
67073+
; SI-NEXT: v_writelane_b32 v21, s4, 12
67074+
; SI-NEXT: v_writelane_b32 v21, s5, 13
67075+
; SI-NEXT: ; implicit-def: $sgpr4
67076+
; SI-NEXT: v_writelane_b32 v21, s4, 14
67077+
; SI-NEXT: v_writelane_b32 v21, s5, 15
67078+
; SI-NEXT: ; implicit-def: $sgpr4
67079+
; SI-NEXT: v_writelane_b32 v21, s4, 16
67080+
; SI-NEXT: v_writelane_b32 v21, s5, 17
67081+
; SI-NEXT: ; implicit-def: $sgpr4
6708267082
; SI-NEXT: s_branch .LBB97_2
6708367083
;
6708467084
; VI-LABEL: bitcast_v32i16_to_v64i8_scalar:
@@ -88402,8 +88402,8 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a,
8840288402
; SI-NEXT: s_lshr_b64 s[4:5], s[74:75], 24
8840388403
; SI-NEXT: s_waitcnt expcnt(0)
8840488404
; SI-NEXT: v_writelane_b32 v41, s4, 0
88405-
; SI-NEXT: v_readfirstlane_b32 s4, v6
8840688405
; SI-NEXT: v_writelane_b32 v41, s5, 1
88406+
; SI-NEXT: v_readfirstlane_b32 s4, v6
8840788407
; SI-NEXT: s_lshr_b32 s5, s4, 16
8840888408
; SI-NEXT: v_readfirstlane_b32 s4, v7
8840988409
; SI-NEXT: s_lshr_b64 s[60:61], s[4:5], 16

llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,8 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
150150
; GCN-NEXT: ;;#ASMEND
151151
; GCN-NEXT: ;;#ASMSTART
152152
; GCN-NEXT: ;;#ASMEND
153-
; GCN-NEXT: v_mov_b32_e32 v1, vcc_lo
154153
; GCN-NEXT: v_readlane_b32 s0, v0, 0
154+
; GCN-NEXT: v_mov_b32_e32 v1, vcc_lo
155155
; GCN-NEXT: v_readlane_b32 s1, v0, 1
156156
; GCN-NEXT: v_mov_b32_e32 v2, 0
157157
; GCN-NEXT: ;;#ASMSTART

llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ body: |
6565
; CHECK: S_NOP 0, implicit-def $exec
6666
; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
6767
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
68-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
69-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0
70-
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
68+
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
69+
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
70+
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
7171
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
7272
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
73-
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
73+
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
7474
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
7575
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
7676
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
@@ -141,12 +141,12 @@ body: |
141141
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec
142142
; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
143143
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
144-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
145-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0
146-
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
144+
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
145+
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
146+
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
147147
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
148148
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
149-
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
149+
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
150150
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
151151
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
152152
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec

llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ define void @main(i1 %arg) #0 {
4141
; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x130
4242
; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
4343
; CHECK-NEXT: v_writelane_b32 v6, s70, 20
44-
; CHECK-NEXT: v_mov_b32_e32 v2, 0
4544
; CHECK-NEXT: v_writelane_b32 v6, s71, 21
4645
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
4746
; CHECK-NEXT: v_mov_b32_e32 v1, s4
47+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
4848
; CHECK-NEXT: v_writelane_b32 v7, s8, 0
4949
; CHECK-NEXT: v_writelane_b32 v7, s9, 1
5050
; CHECK-NEXT: v_writelane_b32 v7, s10, 2
@@ -76,28 +76,28 @@ define void @main(i1 %arg) #0 {
7676
; CHECK-NEXT: v_writelane_b32 v7, s64, 28
7777
; CHECK-NEXT: v_writelane_b32 v7, s65, 29
7878
; CHECK-NEXT: v_writelane_b32 v7, s66, 30
79-
; CHECK-NEXT: v_writelane_b32 v7, s67, 31
8079
; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x1f0
8180
; CHECK-NEXT: s_load_dwordx16 s[36:51], s[68:69], 0x2f0
8281
; CHECK-NEXT: s_mov_b32 s69, s68
8382
; CHECK-NEXT: s_mov_b32 s70, s68
8483
; CHECK-NEXT: s_mov_b32 s71, s68
85-
; CHECK-NEXT: v_mov_b32_e32 v3, v2
84+
; CHECK-NEXT: v_writelane_b32 v7, s67, 31
85+
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[68:71] dmask:0x1
8686
; CHECK-NEXT: v_readlane_b32 s52, v7, 0
87+
; CHECK-NEXT: v_mov_b32_e32 v3, v2
8788
; CHECK-NEXT: v_readlane_b32 s53, v7, 1
8889
; CHECK-NEXT: v_readlane_b32 s54, v7, 2
8990
; CHECK-NEXT: v_readlane_b32 s55, v7, 3
9091
; CHECK-NEXT: v_readlane_b32 s56, v7, 4
9192
; CHECK-NEXT: v_readlane_b32 s57, v7, 5
9293
; CHECK-NEXT: v_readlane_b32 s58, v7, 6
9394
; CHECK-NEXT: v_readlane_b32 s59, v7, 7
94-
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[68:71] dmask:0x1
9595
; CHECK-NEXT: v_and_b32_e32 v5, 1, v0
9696
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v5
9797
; CHECK-NEXT: v_readlane_b32 s60, v7, 8
9898
; CHECK-NEXT: v_readlane_b32 s61, v7, 9
99-
; CHECK-NEXT: image_sample_lz v4, v[2:3], s[52:59], s[68:71] dmask:0x1
10099
; CHECK-NEXT: v_readlane_b32 s62, v7, 10
100+
; CHECK-NEXT: image_sample_lz v4, v[2:3], s[52:59], s[68:71] dmask:0x1
101101
; CHECK-NEXT: v_readlane_b32 s63, v7, 11
102102
; CHECK-NEXT: v_readlane_b32 s64, v7, 12
103103
; CHECK-NEXT: v_readlane_b32 s65, v7, 13
@@ -109,6 +109,7 @@ define void @main(i1 %arg) #0 {
109109
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
110110
; CHECK-NEXT: s_cbranch_execz .LBB0_3
111111
; CHECK-NEXT: ; %bb.1: ; %bb48
112+
; CHECK-NEXT: v_readlane_b32 s52, v7, 16
112113
; CHECK-NEXT: v_readlane_b32 s60, v7, 24
113114
; CHECK-NEXT: v_readlane_b32 s61, v7, 25
114115
; CHECK-NEXT: v_readlane_b32 s62, v7, 26
@@ -119,11 +120,10 @@ define void @main(i1 %arg) #0 {
119120
; CHECK-NEXT: v_readlane_b32 s67, v7, 31
120121
; CHECK-NEXT: v_mov_b32_e32 v1, v2
121122
; CHECK-NEXT: s_and_b64 vcc, exec, -1
122-
; CHECK-NEXT: v_readlane_b32 s52, v7, 16
123123
; CHECK-NEXT: v_readlane_b32 s53, v7, 17
124124
; CHECK-NEXT: v_readlane_b32 s54, v7, 18
125-
; CHECK-NEXT: image_sample_lz v3, v[2:3], s[60:67], s[68:71] dmask:0x1
126125
; CHECK-NEXT: v_readlane_b32 s55, v7, 19
126+
; CHECK-NEXT: image_sample_lz v3, v[2:3], s[60:67], s[68:71] dmask:0x1
127127
; CHECK-NEXT: v_readlane_b32 s56, v7, 20
128128
; CHECK-NEXT: v_readlane_b32 s57, v7, 21
129129
; CHECK-NEXT: v_readlane_b32 s58, v7, 22
@@ -152,25 +152,27 @@ define void @main(i1 %arg) #0 {
152152
; CHECK-NEXT: s_mov_b32 s16, 0
153153
; CHECK-NEXT: s_mov_b32 s17, s16
154154
; CHECK-NEXT: v_mov_b32_e32 v0, s16
155-
; CHECK-NEXT: v_readlane_b32 s52, v7, 24
156-
; CHECK-NEXT: v_readlane_b32 s53, v7, 25
157-
; CHECK-NEXT: v_readlane_b32 s54, v7, 26
158-
; CHECK-NEXT: v_readlane_b32 s55, v7, 27
159-
; CHECK-NEXT: v_readlane_b32 s56, v7, 28
160-
; CHECK-NEXT: v_readlane_b32 s57, v7, 29
161-
; CHECK-NEXT: v_readlane_b32 s58, v7, 30
162-
; CHECK-NEXT: v_readlane_b32 s59, v7, 31
155+
; CHECK-NEXT: v_readlane_b32 s44, v7, 16
163156
; CHECK-NEXT: v_mov_b32_e32 v1, s17
164157
; CHECK-NEXT: s_mov_b32 s18, s16
165158
; CHECK-NEXT: s_mov_b32 s19, s16
166-
; CHECK-NEXT: v_readlane_b32 s44, v7, 16
167159
; CHECK-NEXT: v_readlane_b32 s45, v7, 17
168160
; CHECK-NEXT: v_readlane_b32 s46, v7, 18
169161
; CHECK-NEXT: v_readlane_b32 s47, v7, 19
170162
; CHECK-NEXT: v_readlane_b32 s48, v7, 20
171163
; CHECK-NEXT: v_readlane_b32 s49, v7, 21
172164
; CHECK-NEXT: v_readlane_b32 s50, v7, 22
173165
; CHECK-NEXT: v_readlane_b32 s51, v7, 23
166+
; CHECK-NEXT: v_readlane_b32 s52, v7, 24
167+
; CHECK-NEXT: v_readlane_b32 s53, v7, 25
168+
; CHECK-NEXT: v_readlane_b32 s54, v7, 26
169+
; CHECK-NEXT: v_readlane_b32 s55, v7, 27
170+
; CHECK-NEXT: v_readlane_b32 s56, v7, 28
171+
; CHECK-NEXT: v_readlane_b32 s57, v7, 29
172+
; CHECK-NEXT: v_readlane_b32 s58, v7, 30
173+
; CHECK-NEXT: v_readlane_b32 s59, v7, 31
174+
; CHECK-NEXT: image_sample_lz v2, v[0:1], s[44:51], s[16:19] dmask:0x1
175+
; CHECK-NEXT: v_readlane_b32 s44, v7, 0
174176
; CHECK-NEXT: v_readlane_b32 s52, v7, 8
175177
; CHECK-NEXT: v_readlane_b32 s53, v7, 9
176178
; CHECK-NEXT: v_readlane_b32 s54, v7, 10
@@ -179,14 +181,12 @@ define void @main(i1 %arg) #0 {
179181
; CHECK-NEXT: v_readlane_b32 s57, v7, 13
180182
; CHECK-NEXT: v_readlane_b32 s58, v7, 14
181183
; CHECK-NEXT: v_readlane_b32 s59, v7, 15
182-
; CHECK-NEXT: image_sample_lz v2, v[0:1], s[44:51], s[16:19] dmask:0x1
183184
; CHECK-NEXT: v_mov_b32_e32 v3, 0
184185
; CHECK-NEXT: v_mov_b32_e32 v4, v3
185-
; CHECK-NEXT: v_readlane_b32 s44, v7, 0
186186
; CHECK-NEXT: v_readlane_b32 s45, v7, 1
187-
; CHECK-NEXT: image_sample_lz v0, v[0:1], s[52:59], s[24:27] dmask:0x1
188187
; CHECK-NEXT: v_readlane_b32 s46, v7, 2
189188
; CHECK-NEXT: v_readlane_b32 s47, v7, 3
189+
; CHECK-NEXT: image_sample_lz v0, v[0:1], s[52:59], s[24:27] dmask:0x1
190190
; CHECK-NEXT: v_readlane_b32 s48, v7, 4
191191
; CHECK-NEXT: v_readlane_b32 s49, v7, 5
192192
; CHECK-NEXT: v_readlane_b32 s50, v7, 6

llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,10 +1826,10 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
18261826
; GCN-NEXT: s_or_b32 s0, s0, s5
18271827
; GCN-NEXT: s_and_b32 s0, s0, 0xffff
18281828
; GCN-NEXT: s_or_b32 s0, s0, s4
1829-
; GCN-NEXT: v_mov_b32_e32 v1, s1
1830-
; GCN-NEXT: v_readlane_b32 s1, v6, 1
18311829
; GCN-NEXT: v_mov_b32_e32 v0, s0
1830+
; GCN-NEXT: v_mov_b32_e32 v1, s1
18321831
; GCN-NEXT: v_readlane_b32 s0, v6, 0
1832+
; GCN-NEXT: v_readlane_b32 s1, v6, 1
18331833
; GCN-NEXT: v_mov_b32_e32 v5, s1
18341834
; GCN-NEXT: v_mov_b32_e32 v2, s2
18351835
; GCN-NEXT: v_mov_b32_e32 v3, s3

llvm/test/CodeGen/AMDGPU/load-constant-i1.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10334,11 +10334,11 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
1033410334
; GFX8-NEXT: v_writelane_b32 v62, s3, 5
1033510335
; GFX8-NEXT: v_readlane_b32 s2, v62, 2
1033610336
; GFX8-NEXT: v_readlane_b32 s3, v62, 3
10337-
; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
1033810337
; GFX8-NEXT: v_mov_b32_e32 v35, s49
1033910338
; GFX8-NEXT: s_bfe_i64 s[48:49], s[4:5], 0x10000
1034010339
; GFX8-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1034110340
; GFX8-NEXT: v_readlane_b32 s2, v62, 0
10341+
; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
1034210342
; GFX8-NEXT: v_readlane_b32 s3, v62, 1
1034310343
; GFX8-NEXT: v_mov_b32_e32 v5, s75
1034410344
; GFX8-NEXT: v_mov_b32_e32 v13, s73
@@ -10632,8 +10632,8 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
1063210632
; GFX8-NEXT: v_mov_b32_e32 v2, s34
1063310633
; GFX8-NEXT: v_mov_b32_e32 v3, s35
1063410634
; GFX8-NEXT: v_mov_b32_e32 v4, s2
10635-
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1063610635
; GFX8-NEXT: v_readlane_b32 s2, v62, 4
10636+
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1063710637
; GFX8-NEXT: v_readlane_b32 s3, v62, 5
1063810638
; GFX8-NEXT: v_mov_b32_e32 v5, s1
1063910639
; GFX8-NEXT: v_mov_b32_e32 v0, s30

0 commit comments

Comments
 (0)