@@ -154,61 +154,61 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
154154; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
155155; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0
156156; GCN-NEXT: s_nop 0
157+ ; GCN-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:16
157158; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
158159; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:8
159- ; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:12
160- ; GCN-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:16
161160; GCN-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:20
162161; GCN-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:24
163162; GCN-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:28
163+ ; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:12
164164; GCN-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:32
165+ ; GCN-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:48
165166; GCN-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:36
166167; GCN-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:40
167- ; GCN-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:44
168- ; GCN-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:48
169168; GCN-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:52
170169; GCN-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:56
171170; GCN-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:60
171+ ; GCN-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:44
172172; GCN-NEXT: buffer_load_dword v16, off, s[0:3], 0 offset:64
173+ ; GCN-NEXT: buffer_load_dword v20, off, s[0:3], 0 offset:80
173174; GCN-NEXT: buffer_load_dword v17, off, s[0:3], 0 offset:68
174175; GCN-NEXT: buffer_load_dword v18, off, s[0:3], 0 offset:72
175- ; GCN-NEXT: buffer_load_dword v19, off, s[0:3], 0 offset:76
176- ; GCN-NEXT: buffer_load_dword v20, off, s[0:3], 0 offset:80
177176; GCN-NEXT: buffer_load_dword v21, off, s[0:3], 0 offset:84
178177; GCN-NEXT: buffer_load_dword v22, off, s[0:3], 0 offset:88
179178; GCN-NEXT: buffer_load_dword v23, off, s[0:3], 0 offset:92
179+ ; GCN-NEXT: buffer_load_dword v19, off, s[0:3], 0 offset:76
180180; GCN-NEXT: buffer_load_dword v24, off, s[0:3], 0 offset:96
181+ ; GCN-NEXT: buffer_load_dword v28, off, s[0:3], 0 offset:112
181182; GCN-NEXT: buffer_load_dword v25, off, s[0:3], 0 offset:100
182183; GCN-NEXT: buffer_load_dword v26, off, s[0:3], 0 offset:104
183- ; GCN-NEXT: buffer_load_dword v27, off, s[0:3], 0 offset:108
184- ; GCN-NEXT: buffer_load_dword v28, off, s[0:3], 0 offset:112
185184; GCN-NEXT: buffer_load_dword v29, off, s[0:3], 0 offset:116
186185; GCN-NEXT: buffer_load_dword v30, off, s[0:3], 0 offset:120
187186; GCN-NEXT: buffer_load_dword v31, off, s[0:3], 0 offset:124
187+ ; GCN-NEXT: buffer_load_dword v27, off, s[0:3], 0 offset:108
188188; GCN-NEXT: buffer_load_dword v32, off, s[0:3], 0 offset:128
189+ ; GCN-NEXT: buffer_load_dword v36, off, s[0:3], 0 offset:144
189190; GCN-NEXT: buffer_load_dword v33, off, s[0:3], 0 offset:132
190191; GCN-NEXT: buffer_load_dword v34, off, s[0:3], 0 offset:136
191- ; GCN-NEXT: buffer_load_dword v35, off, s[0:3], 0 offset:140
192- ; GCN-NEXT: buffer_load_dword v36, off, s[0:3], 0 offset:144
193192; GCN-NEXT: buffer_load_dword v37, off, s[0:3], 0 offset:148
194193; GCN-NEXT: buffer_load_dword v38, off, s[0:3], 0 offset:152
195194; GCN-NEXT: buffer_load_dword v39, off, s[0:3], 0 offset:156
195+ ; GCN-NEXT: buffer_load_dword v35, off, s[0:3], 0 offset:140
196196; GCN-NEXT: buffer_load_dword v40, off, s[0:3], 0 offset:160
197+ ; GCN-NEXT: buffer_load_dword v44, off, s[0:3], 0 offset:176
197198; GCN-NEXT: buffer_load_dword v41, off, s[0:3], 0 offset:164
198199; GCN-NEXT: buffer_load_dword v42, off, s[0:3], 0 offset:168
199- ; GCN-NEXT: buffer_load_dword v43, off, s[0:3], 0 offset:172
200- ; GCN-NEXT: buffer_load_dword v44, off, s[0:3], 0 offset:176
201200; GCN-NEXT: buffer_load_dword v45, off, s[0:3], 0 offset:180
202201; GCN-NEXT: buffer_load_dword v46, off, s[0:3], 0 offset:184
203202; GCN-NEXT: buffer_load_dword v47, off, s[0:3], 0 offset:188
203+ ; GCN-NEXT: buffer_load_dword v43, off, s[0:3], 0 offset:172
204204; GCN-NEXT: buffer_load_dword v48, off, s[0:3], 0 offset:192
205+ ; GCN-NEXT: buffer_load_dword v52, off, s[0:3], 0 offset:208
205206; GCN-NEXT: buffer_load_dword v49, off, s[0:3], 0 offset:196
206207; GCN-NEXT: buffer_load_dword v50, off, s[0:3], 0 offset:200
207- ; GCN-NEXT: buffer_load_dword v51, off, s[0:3], 0 offset:204
208- ; GCN-NEXT: buffer_load_dword v52, off, s[0:3], 0 offset:208
209208; GCN-NEXT: buffer_load_dword v53, off, s[0:3], 0 offset:212
210209; GCN-NEXT: buffer_load_dword v54, off, s[0:3], 0 offset:216
211210; GCN-NEXT: buffer_load_dword v55, off, s[0:3], 0 offset:220
211+ ; GCN-NEXT: buffer_load_dword v51, off, s[0:3], 0 offset:204
212212; GCN-NEXT: buffer_load_dword v56, off, s[0:3], 0 offset:224
213213; GCN-NEXT: buffer_load_dword v57, off, s[0:3], 0 offset:228
214214; GCN-NEXT: buffer_load_dword v58, off, s[0:3], 0 offset:232
@@ -217,33 +217,26 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
217217; GCN-NEXT: buffer_load_dword v61, off, s[0:3], 0 offset:244
218218; GCN-NEXT: buffer_load_dword v62, off, s[0:3], 0 offset:248
219219; GCN-NEXT: buffer_load_dword v63, off, s[0:3], 0 offset:252
220- ; GCN-NEXT: s_waitcnt vmcnt(60 )
220+ ; GCN-NEXT: s_waitcnt vmcnt(56 )
221221; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[20:21]
222- ; GCN-NEXT: s_waitcnt vmcnt(57)
223222; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[20:21] offset:16
224- ; GCN-NEXT: s_waitcnt vmcnt(54 )
223+ ; GCN-NEXT: s_waitcnt vmcnt(50 )
225224; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[20:21] offset:32
226- ; GCN-NEXT: s_waitcnt vmcnt(51)
227225; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[20:21] offset:48
228- ; GCN-NEXT: s_waitcnt vmcnt(48 )
226+ ; GCN-NEXT: s_waitcnt vmcnt(44 )
229227; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[20:21] offset:64
230- ; GCN-NEXT: s_waitcnt vmcnt(45)
231228; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[20:21] offset:80
232- ; GCN-NEXT: s_waitcnt vmcnt(42 )
229+ ; GCN-NEXT: s_waitcnt vmcnt(38 )
233230; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[20:21] offset:96
234- ; GCN-NEXT: s_waitcnt vmcnt(39)
235231; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[20:21] offset:112
236- ; GCN-NEXT: s_waitcnt vmcnt(36 )
232+ ; GCN-NEXT: s_waitcnt vmcnt(32 )
237233; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[20:21] offset:128
238- ; GCN-NEXT: s_waitcnt vmcnt(33)
239234; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[20:21] offset:144
240- ; GCN-NEXT: s_waitcnt vmcnt(30 )
235+ ; GCN-NEXT: s_waitcnt vmcnt(26 )
241236; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[20:21] offset:160
242- ; GCN-NEXT: s_waitcnt vmcnt(27)
243237; GCN-NEXT: global_store_dwordx4 v64, v[44:47], s[20:21] offset:176
244- ; GCN-NEXT: s_waitcnt vmcnt(24 )
238+ ; GCN-NEXT: s_waitcnt vmcnt(20 )
245239; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[20:21] offset:192
246- ; GCN-NEXT: s_waitcnt vmcnt(21)
247240; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[20:21] offset:208
248241; GCN-NEXT: s_waitcnt vmcnt(18)
249242; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[20:21] offset:224
0 commit comments