|
2 | 2 | ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s |
3 | 3 | ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s |
4 | 4 | ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s |
5 | | -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s |
| 5 | +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-TRUE16 %s |
| 6 | +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-FAKE16 %s |
6 | 7 |
|
7 | 8 | declare i32 @llvm.amdgcn.workitem.id.x() #0 |
8 | 9 |
|
@@ -227,14 +228,22 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 { |
227 | 228 | ; GFX10-NEXT: ds_write_b8 v0, v1 offset:65535 |
228 | 229 | ; GFX10-NEXT: s_endpgm |
229 | 230 | ; |
230 | | -; GFX11-LABEL: add_x_shl_max_offset: |
231 | | -; GFX11: ; %bb.0: |
232 | | -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
233 | | -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
234 | | -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 4, v0 |
235 | | -; GFX11-NEXT: v_mov_b16_e32 v0.l, 13 |
236 | | -; GFX11-NEXT: ds_store_b8 v1, v0 offset:65535 |
237 | | -; GFX11-NEXT: s_endpgm |
| 231 | +; GFX11-TRUE16-LABEL: add_x_shl_max_offset: |
| 232 | +; GFX11-TRUE16: ; %bb.0: |
| 233 | +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 234 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 235 | +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 4, v0 |
| 236 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 |
| 237 | +; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0 offset:65535 |
| 238 | +; GFX11-TRUE16-NEXT: s_endpgm |
| 239 | +; |
| 240 | +; GFX11-FAKE16-LABEL: add_x_shl_max_offset: |
| 241 | +; GFX11-FAKE16: ; %bb.0: |
| 242 | +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0 |
| 243 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 244 | +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 4, v0 |
| 245 | +; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1 offset:65535 |
| 246 | +; GFX11-FAKE16-NEXT: s_endpgm |
238 | 247 | %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() |
239 | 248 | %shl = shl i32 %x.i, 4 |
240 | 249 | %add = add i32 %shl, 65535 |
@@ -272,15 +281,24 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 { |
272 | 281 | ; GFX10-NEXT: ds_write_b8 v0, v1 |
273 | 282 | ; GFX10-NEXT: s_endpgm |
274 | 283 | ; |
275 | | -; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_alt: |
276 | | -; GFX11: ; %bb.0: |
277 | | -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
278 | | -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
279 | | -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
280 | | -; GFX11-NEXT: v_xor_b32_e32 v1, 0xffff, v0 |
281 | | -; GFX11-NEXT: v_mov_b16_e32 v0.l, 13 |
282 | | -; GFX11-NEXT: ds_store_b8 v1, v0 |
283 | | -; GFX11-NEXT: s_endpgm |
| 284 | +; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_alt: |
| 285 | +; GFX11-TRUE16: ; %bb.0: |
| 286 | +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 287 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 288 | +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 289 | +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0xffff, v0 |
| 290 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 |
| 291 | +; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0 |
| 292 | +; GFX11-TRUE16-NEXT: s_endpgm |
| 293 | +; |
| 294 | +; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_alt: |
| 295 | +; GFX11-FAKE16: ; %bb.0: |
| 296 | +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0 |
| 297 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 298 | +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 299 | +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v0, 0xffff, v0 |
| 300 | +; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1 |
| 301 | +; GFX11-FAKE16-NEXT: s_endpgm |
284 | 302 | %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() |
285 | 303 | %.neg = mul i32 %x.i, -4 |
286 | 304 | %add = add i32 %.neg, 65535 |
@@ -318,15 +336,24 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 { |
318 | 336 | ; GFX10-NEXT: ds_write_b8 v0, v1 |
319 | 337 | ; GFX10-NEXT: s_endpgm |
320 | 338 | ; |
321 | | -; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical: |
322 | | -; GFX11: ; %bb.0: |
323 | | -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
324 | | -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
325 | | -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
326 | | -; GFX11-NEXT: v_xor_b32_e32 v1, 0xffff, v0 |
327 | | -; GFX11-NEXT: v_mov_b16_e32 v0.l, 13 |
328 | | -; GFX11-NEXT: ds_store_b8 v1, v0 |
329 | | -; GFX11-NEXT: s_endpgm |
| 339 | +; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical: |
| 340 | +; GFX11-TRUE16: ; %bb.0: |
| 341 | +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 342 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 343 | +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 344 | +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0xffff, v0 |
| 345 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 |
| 346 | +; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0 |
| 347 | +; GFX11-TRUE16-NEXT: s_endpgm |
| 348 | +; |
| 349 | +; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical: |
| 350 | +; GFX11-FAKE16: ; %bb.0: |
| 351 | +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0 |
| 352 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 353 | +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 354 | +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v0, 0xffff, v0 |
| 355 | +; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1 |
| 356 | +; GFX11-FAKE16-NEXT: s_endpgm |
330 | 357 | %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 |
331 | 358 | %neg = sub i32 0, %x.i |
332 | 359 | %shl = shl i32 %neg, 2 |
@@ -362,15 +389,24 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 { |
362 | 389 | ; GFX10-NEXT: ds_write_b8 v0, v1 |
363 | 390 | ; GFX10-NEXT: s_endpgm |
364 | 391 | ; |
365 | | -; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_p1: |
366 | | -; GFX11: ; %bb.0: |
367 | | -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
368 | | -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
369 | | -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
370 | | -; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0x10000, v0 |
371 | | -; GFX11-NEXT: v_mov_b16_e32 v0.l, 13 |
372 | | -; GFX11-NEXT: ds_store_b8 v1, v0 |
373 | | -; GFX11-NEXT: s_endpgm |
| 392 | +; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_p1: |
| 393 | +; GFX11-TRUE16: ; %bb.0: |
| 394 | +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 395 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 396 | +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 397 | +; GFX11-TRUE16-NEXT: v_sub_nc_u32_e32 v1, 0x10000, v0 |
| 398 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 |
| 399 | +; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0 |
| 400 | +; GFX11-TRUE16-NEXT: s_endpgm |
| 401 | +; |
| 402 | +; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_p1: |
| 403 | +; GFX11-FAKE16: ; %bb.0: |
| 404 | +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0 |
| 405 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 406 | +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 407 | +; GFX11-FAKE16-NEXT: v_sub_nc_u32_e32 v0, 0x10000, v0 |
| 408 | +; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1 |
| 409 | +; GFX11-FAKE16-NEXT: s_endpgm |
374 | 410 | %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 |
375 | 411 | %neg = sub i32 0, %x.i |
376 | 412 | %shl = shl i32 %neg, 2 |
|
0 commit comments