@@ -127,12 +127,13 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
127127; GCN-SDAG: ; %bb.0:
128128; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
129129; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
130- ; GCN-SDAG-NEXT: s_clause 0xc
131- ; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:48
132- ; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:44
133- ; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:40
134- ; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 offset:36
135- ; GCN-SDAG-NEXT: scratch_store_b32 off, v44, s32 offset:32
130+ ; GCN-SDAG-NEXT: s_clause 0xd
131+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:52
132+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:48
133+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:44
134+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 offset:40
135+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v44, s32 offset:36
136+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v45, s32 offset:32
136137; GCN-SDAG-NEXT: scratch_store_b32 off, v56, s32 offset:28
137138; GCN-SDAG-NEXT: scratch_store_b32 off, v57, s32 offset:24
138139; GCN-SDAG-NEXT: scratch_store_b32 off, v58, s32 offset:20
@@ -141,57 +142,50 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
141142; GCN-SDAG-NEXT: scratch_store_b32 off, v61, s32 offset:8
142143; GCN-SDAG-NEXT: scratch_store_b32 off, v62, s32 offset:4
143144; GCN-SDAG-NEXT: scratch_store_b32 off, v63, s32
144- ; GCN-SDAG-NEXT: global_load_b128 v[5:8], v[0:1], off offset:224
145+ ; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:224
146+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
145147; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
146- ; GCN-SDAG-NEXT: scratch_store_b128 off, v[5:8 ], s32 offset:68 ; 16-byte Folded Spill
147- ; GCN-SDAG-NEXT: global_load_b128 v[5:8 ], v[0:1], off offset:240
148+ ; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9 ], s32 offset:56 ; 16-byte Folded Spill
149+ ; GCN-SDAG-NEXT: global_load_b128 v[6:9 ], v[0:1], off offset:240
148150; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
149- ; GCN-SDAG-NEXT: scratch_store_b128 off, v[5:8], s32 offset:84 ; 16-byte Folded Spill
150- ; GCN-SDAG-NEXT: s_clause 0xc
151- ; GCN-SDAG-NEXT: global_load_b128 v[13:16], v[0:1], off offset:192
152- ; GCN-SDAG-NEXT: global_load_b128 v[17:20], v[0:1], off offset:208
153- ; GCN-SDAG-NEXT: global_load_b128 v[21:24], v[0:1], off offset:160
154- ; GCN-SDAG-NEXT: global_load_b128 v[25:28], v[0:1], off offset:176
155- ; GCN-SDAG-NEXT: global_load_b128 v[29:32], v[0:1], off offset:128
156- ; GCN-SDAG-NEXT: global_load_b128 v[33:36], v[0:1], off offset:144
157- ; GCN-SDAG-NEXT: global_load_b128 v[48:51], v[0:1], off offset:96
158- ; GCN-SDAG-NEXT: global_load_b128 v[52:55], v[0:1], off offset:112
159- ; GCN-SDAG-NEXT: global_load_b128 v[37:40], v[0:1], off offset:64
160- ; GCN-SDAG-NEXT: global_load_b128 v[41:44], v[0:1], off offset:80
161- ; GCN-SDAG-NEXT: global_load_b128 v[56:59], v[0:1], off offset:32
162- ; GCN-SDAG-NEXT: global_load_b128 v[60:63], v[0:1], off offset:48
163- ; GCN-SDAG-NEXT: global_load_b128 v[5:8], v[0:1], off
151+ ; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:72 ; 16-byte Folded Spill
152+ ; GCN-SDAG-NEXT: s_clause 0xd
153+ ; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:192
154+ ; GCN-SDAG-NEXT: global_load_b128 v[14:17], v[0:1], off offset:208
155+ ; GCN-SDAG-NEXT: global_load_b128 v[18:21], v[0:1], off offset:160
156+ ; GCN-SDAG-NEXT: global_load_b128 v[22:25], v[0:1], off offset:176
157+ ; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off offset:128
158+ ; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off offset:144
159+ ; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:96
160+ ; GCN-SDAG-NEXT: global_load_b128 v[48:51], v[0:1], off offset:112
161+ ; GCN-SDAG-NEXT: global_load_b128 v[52:55], v[0:1], off offset:64
162+ ; GCN-SDAG-NEXT: global_load_b128 v[38:41], v[0:1], off offset:80
163+ ; GCN-SDAG-NEXT: global_load_b128 v[42:45], v[0:1], off offset:32
164+ ; GCN-SDAG-NEXT: global_load_b128 v[56:59], v[0:1], off offset:48
165+ ; GCN-SDAG-NEXT: global_load_b128 v[60:63], v[0:1], off
166+ ; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16
167+ ; GCN-SDAG-NEXT: scratch_load_b128 v[6:9], off, s32 offset:56 th:TH_LOAD_LU ; 16-byte Folded Reload
164168; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
165- ; GCN-SDAG-NEXT: scratch_store_b128 off, v[5:8], s32 offset:52 ; 16-byte Folded Spill
166- ; GCN-SDAG-NEXT: global_load_b128 v[5:8], v[0:1], off offset:16
167- ; GCN-SDAG-NEXT: scratch_load_b128 v[9:12], off, s32 offset:68 th:TH_LOAD_LU ; 16-byte Folded Reload
168- ; GCN-SDAG-NEXT: s_wait_loadcnt 0x1
169- ; GCN-SDAG-NEXT: s_wait_xcnt 0x1
170- ; GCN-SDAG-NEXT: v_mov_b32_e32 v0, v7
171- ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
172- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[9:12], off offset:224
173- ; GCN-SDAG-NEXT: scratch_load_b128 v[9:12], off, s32 offset:84 th:TH_LOAD_LU ; 16-byte Folded Reload
174- ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
175- ; GCN-SDAG-NEXT: s_clause 0xc
176- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[9:12], off offset:240
177- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[13:16], off offset:192
178- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[17:20], off offset:208
179- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[21:24], off offset:160
180- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[25:28], off offset:176
181- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[29:32], off offset:128
182- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[33:36], off offset:144
183- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[48:51], off offset:96
184- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[52:55], off offset:112
185- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[37:40], off offset:64
186- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[41:44], off offset:80
187- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[56:59], off offset:32
188- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[60:63], off offset:48
189- ; GCN-SDAG-NEXT: scratch_load_b128 v[9:12], off, s32 offset:52 th:TH_LOAD_LU ; 16-byte Folded Reload
169+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:224
170+ ; GCN-SDAG-NEXT: scratch_load_b128 v[6:9], off, s32 offset:72 th:TH_LOAD_LU ; 16-byte Folded Reload
190171; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
191- ; GCN-SDAG-NEXT: s_clause 0x1
192- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[9:12], off
193- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[5:8], off offset:16
194- ; GCN-SDAG-NEXT: s_clause 0xc
172+ ; GCN-SDAG-NEXT: s_clause 0xe
173+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:240
174+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:192
175+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[14:17], off offset:208
176+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:160
177+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[22:25], off offset:176
178+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[26:29], off offset:128
179+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[30:33], off offset:144
180+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[34:37], off offset:96
181+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[48:51], off offset:112
182+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[52:55], off offset:64
183+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[38:41], off offset:80
184+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[42:45], off offset:32
185+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[56:59], off offset:48
186+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[60:63], off
187+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[0:3], off offset:16
188+ ; GCN-SDAG-NEXT: s_clause 0xd
195189; GCN-SDAG-NEXT: scratch_load_b32 v63, off, s32
196190; GCN-SDAG-NEXT: scratch_load_b32 v62, off, s32 offset:4
197191; GCN-SDAG-NEXT: scratch_load_b32 v61, off, s32 offset:8
@@ -200,24 +194,30 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
200194; GCN-SDAG-NEXT: scratch_load_b32 v58, off, s32 offset:20
201195; GCN-SDAG-NEXT: scratch_load_b32 v57, off, s32 offset:24
202196; GCN-SDAG-NEXT: scratch_load_b32 v56, off, s32 offset:28
203- ; GCN-SDAG-NEXT: scratch_load_b32 v44, off, s32 offset:32
204- ; GCN-SDAG-NEXT: scratch_load_b32 v43, off, s32 offset:36
205- ; GCN-SDAG-NEXT: scratch_load_b32 v42, off, s32 offset:40
206- ; GCN-SDAG-NEXT: scratch_load_b32 v41, off, s32 offset:44
207- ; GCN-SDAG-NEXT: scratch_load_b32 v40, off, s32 offset:48
197+ ; GCN-SDAG-NEXT: scratch_load_b32 v45, off, s32 offset:32
198+ ; GCN-SDAG-NEXT: scratch_load_b32 v44, off, s32 offset:36
199+ ; GCN-SDAG-NEXT: scratch_load_b32 v43, off, s32 offset:40
200+ ; GCN-SDAG-NEXT: scratch_load_b32 v42, off, s32 offset:44
201+ ; GCN-SDAG-NEXT: scratch_load_b32 v41, off, s32 offset:48
202+ ; GCN-SDAG-NEXT: scratch_load_b32 v40, off, s32 offset:52
203+ ; GCN-SDAG-NEXT: s_wait_xcnt 0xe
204+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v0, v2
208205; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
209206; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
210207;
211208; GCN-GISEL-LABEL: test_v64i32_load_store:
212209; GCN-GISEL: ; %bb.0:
213210; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
214211; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
215- ; GCN-GISEL-NEXT: s_clause 0xc
216- ; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:48
217- ; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:44
218- ; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:40
219- ; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:36
220- ; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:32
212+ ; GCN-GISEL-NEXT: s_clause 0xf
213+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:60
214+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:56
215+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:52
216+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:48
217+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:44
218+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32 offset:40
219+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v46, s32 offset:36
220+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v47, s32 offset:32
221221; GCN-GISEL-NEXT: scratch_store_b32 off, v56, s32 offset:28
222222; GCN-GISEL-NEXT: scratch_store_b32 off, v57, s32 offset:24
223223; GCN-GISEL-NEXT: scratch_store_b32 off, v58, s32 offset:20
@@ -226,56 +226,53 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
226226; GCN-GISEL-NEXT: scratch_store_b32 off, v61, s32 offset:8
227227; GCN-GISEL-NEXT: scratch_store_b32 off, v62, s32 offset:4
228228; GCN-GISEL-NEXT: scratch_store_b32 off, v63, s32
229- ; GCN-GISEL-NEXT: global_load_b128 v[5:8], v[0:1], off offset:32
229+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x8
230+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v46, v3 :: v_dual_mov_b32 v47, v4
231+ ; GCN-GISEL-NEXT: global_load_b128 v[2:5], v[0:1], off offset:32
230232; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
231- ; GCN-GISEL-NEXT: scratch_store_b128 off, v[5:8], s32 offset:52 ; 16-byte Folded Spill
232- ; GCN-GISEL-NEXT: global_load_b128 v[5:8], v[0:1], off offset:48
233+ ; GCN-GISEL-NEXT: scratch_store_b128 off, v[2:5], s32 offset:80 ; 16-byte Folded Spill
234+ ; GCN-GISEL-NEXT: s_clause 0xe
235+ ; GCN-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:48
236+ ; GCN-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off offset:64
237+ ; GCN-GISEL-NEXT: global_load_b128 v[14:17], v[0:1], off offset:80
238+ ; GCN-GISEL-NEXT: global_load_b128 v[18:21], v[0:1], off offset:96
239+ ; GCN-GISEL-NEXT: global_load_b128 v[22:25], v[0:1], off offset:112
240+ ; GCN-GISEL-NEXT: global_load_b128 v[26:29], v[0:1], off offset:128
241+ ; GCN-GISEL-NEXT: global_load_b128 v[30:33], v[0:1], off offset:144
242+ ; GCN-GISEL-NEXT: global_load_b128 v[34:37], v[0:1], off offset:160
243+ ; GCN-GISEL-NEXT: global_load_b128 v[48:51], v[0:1], off offset:176
244+ ; GCN-GISEL-NEXT: global_load_b128 v[52:55], v[0:1], off offset:192
245+ ; GCN-GISEL-NEXT: global_load_b128 v[38:41], v[0:1], off offset:208
246+ ; GCN-GISEL-NEXT: global_load_b128 v[42:45], v[0:1], off offset:224
247+ ; GCN-GISEL-NEXT: global_load_b128 v[56:59], v[0:1], off
248+ ; GCN-GISEL-NEXT: global_load_b128 v[60:63], v[0:1], off offset:16
249+ ; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:240
233250; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
234- ; GCN-GISEL-NEXT: scratch_store_b128 off, v[5:8], s32 offset:68 ; 16-byte Folded Spill
235- ; GCN-GISEL-NEXT: s_clause 0xd
236- ; GCN-GISEL-NEXT: global_load_b128 v[13:16], v[0:1], off offset:64
237- ; GCN-GISEL-NEXT: global_load_b128 v[17:20], v[0:1], off offset:80
238- ; GCN-GISEL-NEXT: global_load_b128 v[21:24], v[0:1], off offset:96
239- ; GCN-GISEL-NEXT: global_load_b128 v[25:28], v[0:1], off offset:112
240- ; GCN-GISEL-NEXT: global_load_b128 v[29:32], v[0:1], off offset:128
241- ; GCN-GISEL-NEXT: global_load_b128 v[33:36], v[0:1], off offset:144
242- ; GCN-GISEL-NEXT: global_load_b128 v[48:51], v[0:1], off offset:160
243- ; GCN-GISEL-NEXT: global_load_b128 v[52:55], v[0:1], off offset:176
244- ; GCN-GISEL-NEXT: global_load_b128 v[37:40], v[0:1], off offset:192
245- ; GCN-GISEL-NEXT: global_load_b128 v[41:44], v[0:1], off offset:208
246- ; GCN-GISEL-NEXT: global_load_b128 v[56:59], v[0:1], off offset:224
247- ; GCN-GISEL-NEXT: global_load_b128 v[60:63], v[0:1], off
248- ; GCN-GISEL-NEXT: global_load_b128 v[5:8], v[0:1], off offset:16
249- ; GCN-GISEL-NEXT: global_load_b128 v[9:12], v[0:1], off offset:240
250- ; GCN-GISEL-NEXT: s_wait_loadcnt 0x1
251- ; GCN-GISEL-NEXT: s_wait_xcnt 0x0
252- ; GCN-GISEL-NEXT: v_mov_b32_e32 v0, v7
253- ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
254- ; GCN-GISEL-NEXT: scratch_store_b128 off, v[9:12], s32 offset:84 ; 16-byte Folded Spill
255- ; GCN-GISEL-NEXT: scratch_load_b128 v[9:12], off, s32 offset:52 th:TH_LOAD_LU ; 16-byte Folded Reload
251+ ; GCN-GISEL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:64 ; 16-byte Folded Spill
252+ ; GCN-GISEL-NEXT: scratch_load_b128 v[0:3], off, s32 offset:80 th:TH_LOAD_LU ; 16-byte Folded Reload
256253; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
257- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[9:12], off offset:32
258- ; GCN-GISEL-NEXT: scratch_load_b128 v[9:12], off, s32 offset:68 th:TH_LOAD_LU ; 16-byte Folded Reload
254+ ; GCN-GISEL-NEXT: s_clause 0xe
255+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[0:3], off offset:32
256+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[6:9], off offset:48
257+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[10:13], off offset:64
258+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[14:17], off offset:80
259+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[18:21], off offset:96
260+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[22:25], off offset:112
261+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[26:29], off offset:128
262+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[30:33], off offset:144
263+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[34:37], off offset:160
264+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[48:51], off offset:176
265+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[52:55], off offset:192
266+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[38:41], off offset:208
267+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[42:45], off offset:224
268+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[56:59], off
269+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[60:63], off offset:16
270+ ; GCN-GISEL-NEXT: scratch_load_b128 v[0:3], off, s32 offset:64 th:TH_LOAD_LU ; 16-byte Folded Reload
259271; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
260- ; GCN-GISEL-NEXT: s_clause 0xd
261- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[9:12], off offset:48
262- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[13:16], off offset:64
263- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[17:20], off offset:80
264- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[21:24], off offset:96
265- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[25:28], off offset:112
266- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[29:32], off offset:128
267- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[33:36], off offset:144
268- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[48:51], off offset:160
269- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[52:55], off offset:176
270- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[37:40], off offset:192
271- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[41:44], off offset:208
272- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[56:59], off offset:224
273- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[60:63], off
274- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[5:8], off offset:16
275- ; GCN-GISEL-NEXT: scratch_load_b128 v[8:11], off, s32 offset:84 th:TH_LOAD_LU ; 16-byte Folded Reload
276- ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
277- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[8:11], off offset:240
278- ; GCN-GISEL-NEXT: s_clause 0xc
272+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[0:3], off offset:240
273+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x0
274+ ; GCN-GISEL-NEXT: v_mov_b32_e32 v0, v62
275+ ; GCN-GISEL-NEXT: s_clause 0xf
279276; GCN-GISEL-NEXT: scratch_load_b32 v63, off, s32
280277; GCN-GISEL-NEXT: scratch_load_b32 v62, off, s32 offset:4
281278; GCN-GISEL-NEXT: scratch_load_b32 v61, off, s32 offset:8
@@ -284,11 +281,14 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
284281; GCN-GISEL-NEXT: scratch_load_b32 v58, off, s32 offset:20
285282; GCN-GISEL-NEXT: scratch_load_b32 v57, off, s32 offset:24
286283; GCN-GISEL-NEXT: scratch_load_b32 v56, off, s32 offset:28
287- ; GCN-GISEL-NEXT: scratch_load_b32 v44, off, s32 offset:32
288- ; GCN-GISEL-NEXT: scratch_load_b32 v43, off, s32 offset:36
289- ; GCN-GISEL-NEXT: scratch_load_b32 v42, off, s32 offset:40
290- ; GCN-GISEL-NEXT: scratch_load_b32 v41, off, s32 offset:44
291- ; GCN-GISEL-NEXT: scratch_load_b32 v40, off, s32 offset:48
284+ ; GCN-GISEL-NEXT: scratch_load_b32 v47, off, s32 offset:32
285+ ; GCN-GISEL-NEXT: scratch_load_b32 v46, off, s32 offset:36
286+ ; GCN-GISEL-NEXT: scratch_load_b32 v45, off, s32 offset:40
287+ ; GCN-GISEL-NEXT: scratch_load_b32 v44, off, s32 offset:44
288+ ; GCN-GISEL-NEXT: scratch_load_b32 v43, off, s32 offset:48
289+ ; GCN-GISEL-NEXT: scratch_load_b32 v42, off, s32 offset:52
290+ ; GCN-GISEL-NEXT: scratch_load_b32 v41, off, s32 offset:56
291+ ; GCN-GISEL-NEXT: scratch_load_b32 v40, off, s32 offset:60
292292; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
293293; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
294294 %vec = load <64 x i32 >, ptr addrspace (1 ) %ptr
0 commit comments