@@ -117,3 +117,173 @@ body: |
117117 S_ENDPGM 0
118118
119119 ...
120+
121+ # Soft waitcnt should be honored here.
122+ # GCN-LABEL: name: buffer_load_dword_lds_ds_read_soft_wait
123+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
124+ # GCN-NEXT: S_WAITCNT 3952
125+ # vmcnt(0)
126+ # GCN-NEXT: S_BARRIER
127+ ---
128+ name : buffer_load_dword_lds_ds_read_soft_wait
129+ body : |
130+ bb.0:
131+ $m0 = S_MOV_B32 0
132+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
133+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3952
134+ S_BARRIER
135+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
136+ S_ENDPGM 0
137+
138+ ...
139+
140+ # No need for waitcnt.
141+ # GCN-LABEL: name: buffer_store_lds_dword_ds_read_soft_wait
142+ # GCN: BUFFER_STORE_LDS_DWORD
143+ # GCN-NEXT: S_BARRIER
144+ ---
145+ name : buffer_store_lds_dword_ds_read_soft_wait
146+ body : |
147+ bb.0:
148+ $m0 = S_MOV_B32 0
149+ BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(3) poison` + 4), (store (s32) into `ptr addrspace(1) poison` + 4)
150+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3952
151+ S_BARRIER
152+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
153+ S_ENDPGM 0
154+
155+ ...
156+
157+ # Soft waitcnt should mean vmcnt(1) before the barrier and vmcnt(0) after.
158+ # GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read_soft_wait
159+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
160+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
161+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
162+ # GCN-NEXT: S_WAITCNT 3953
163+ # vmcnt(1)
164+ # GCN-NEXT: S_BARRIER
165+ # GCN-NEXT: S_WAITCNT 3952
166+ # vmcnt(0)
167+ # GCN-NEXT: DS_READ_B32_gfx9
168+ ---
169+ name : series_of_buffer_load_dword_lds_ds_read_soft_wait
170+ body : |
171+ bb.0:
172+ $m0 = S_MOV_B32 0
173+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
174+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
175+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 8), (store (s32) into `ptr addrspace(3) poison` + 8)
176+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
177+ S_BARRIER
178+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
179+ S_ENDPGM 0
180+
181+ ...
182+
183+ # No waitcnt before the barrier because counter is too high
184+ # GCN-LABEL: name: buffer_load_dword_lds_ds_read_soft_wait_redundant
185+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
186+ # GCN-NEXT: S_BARRIER
187+ # GCN-NEXT: S_WAITCNT 3952
188+ # vmcnt(0)
189+ # GCN-NEXT: DS_READ_B32_gfx9
190+ ---
191+ name : buffer_load_dword_lds_ds_read_soft_wait_redundant
192+ body : |
193+ bb.0:
194+ $m0 = S_MOV_B32 0
195+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
196+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
197+ S_BARRIER
198+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
199+ S_ENDPGM 0
200+
201+ ...
202+
203+ # Combine waitcnt.
204+ # GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read_soft_wait_repeat
205+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
206+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
207+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
208+ # GCN-NEXT: S_WAITCNT 3953
209+ # vmcnt(1)
210+ # GCN-NEXT: S_BARRIER
211+ # GCN-NEXT: S_WAITCNT 3952
212+ # vmcnt(0)
213+ # GCN-NEXT: DS_READ_B32_gfx9
214+ ---
215+ name : series_of_buffer_load_dword_lds_ds_read_soft_wait_repeat
216+ body : |
217+ bb.0:
218+ $m0 = S_MOV_B32 0
219+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
220+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
221+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 8), (store (s32) into `ptr addrspace(3) poison` + 8)
222+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
223+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
224+ S_BARRIER
225+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
226+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
227+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
228+ S_ENDPGM 0
229+
230+ ...
231+
232+ # Merge waitcnt.
233+ # GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read_soft_wait_merge
234+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
235+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
236+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
237+ # GCN-NEXT: S_WAITCNT 3953
238+ # vmcnt(1)
239+ # GCN-NEXT: S_BARRIER
240+ # GCN-NEXT: S_WAITCNT 3952
241+ # vmcnt(0)
242+ # GCN-NEXT: DS_READ_B32_gfx9
243+ ---
244+ name : series_of_buffer_load_dword_lds_ds_read_soft_wait_merge
245+ body : |
246+ bb.0:
247+ $m0 = S_MOV_B32 0
248+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
249+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
250+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 8), (store (s32) into `ptr addrspace(3) poison` + 8)
251+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3954
252+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
253+ S_BARRIER
254+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3952
255+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3952
256+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
257+ S_ENDPGM 0
258+
259+ ...
260+
261+
262+ # Handle the preexisting waitcnt.
263+ # GCN-LABEL: name: series_of_buffer_load_dword_lds_ds_read_soft_wait_preexisting
264+ # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN
265+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
266+ # GCN-NEXT: S_WAITCNT 0
267+ # GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN
268+ # GCN-NEXT: S_BARRIER
269+ # GCN-NEXT: S_WAITCNT 3952
270+ # vmcnt(0)
271+ # GCN-NEXT: DS_READ_B32_gfx9
272+ ---
273+ name : series_of_buffer_load_dword_lds_ds_read_soft_wait_preexisting
274+ body : |
275+ bb.0:
276+ $m0 = S_MOV_B32 0
277+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison`), (store (s32) into `ptr addrspace(3) poison`)
278+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
279+ S_WAITCNT 0
280+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 8), (store (s32) into `ptr addrspace(3) poison` + 8)
281+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
282+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
283+ S_BARRIER
284+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
285+ S_WAITCNT_DIRECT_LDS_LOAD_soft 3953
286+ $vgpr0 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `ptr addrspace(3) poison`)
287+ S_ENDPGM 0
288+
289+ ...
0 commit comments