|
3 | 3 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s |
4 | 4 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX10,GFX10_DEFAULT %s |
5 | 5 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX10,FLATSCR_GFX10 %s |
6 | | -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX11 %s |
7 | | -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX11 %s |
| 6 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s |
| 7 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s |
| 8 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s |
| 9 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s |
8 | 10 |
|
9 | 11 | define <2 x half> @chain_hi_to_lo_private() { |
10 | 12 | ; GFX900-LABEL: chain_hi_to_lo_private: |
@@ -156,14 +158,23 @@ define <2 x half> @chain_hi_to_lo_arithmatic(ptr addrspace(5) %base, half %in) { |
156 | 158 | ; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v0, v1 |
157 | 159 | ; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31] |
158 | 160 | ; |
159 | | -; GFX11-LABEL: chain_hi_to_lo_arithmatic: |
160 | | -; GFX11: ; %bb.0: ; %bb |
161 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
162 | | -; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 |
163 | | -; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
164 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) |
165 | | -; GFX11-NEXT: v_mov_b32_e32 v0, v1 |
166 | | -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 161 | +; GFX11-TRUE16-LABEL: chain_hi_to_lo_arithmatic: |
| 162 | +; GFX11-TRUE16: ; %bb.0: ; %bb |
| 163 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 164 | +; GFX11-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l |
| 165 | +; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
| 166 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 167 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v1 |
| 168 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 169 | +; |
| 170 | +; GFX11-FAKE16-LABEL: chain_hi_to_lo_arithmatic: |
| 171 | +; GFX11-FAKE16: ; %bb.0: ; %bb |
| 172 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 173 | +; GFX11-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 |
| 174 | +; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
| 175 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 176 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v1 |
| 177 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
167 | 178 | bb: |
168 | 179 | %arith_lo = fadd half %in, 1.0 |
169 | 180 | %load_hi = load half, ptr addrspace(5) %base |
@@ -361,18 +372,31 @@ define <2 x half> @chain_hi_to_lo_flat() { |
361 | 372 | ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
362 | 373 | ; GFX10-NEXT: s_setpc_b64 s[30:31] |
363 | 374 | ; |
364 | | -; GFX11-LABEL: chain_hi_to_lo_flat: |
365 | | -; GFX11: ; %bb.0: ; %bb |
366 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
367 | | -; GFX11-NEXT: v_mov_b32_e32 v0, 2 |
368 | | -; GFX11-NEXT: v_mov_b32_e32 v1, 0 |
369 | | -; GFX11-NEXT: flat_load_u16 v0, v[0:1] |
370 | | -; GFX11-NEXT: v_mov_b32_e32 v1, 0 |
371 | | -; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
372 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
373 | | -; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
374 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
375 | | -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 375 | +; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat: |
| 376 | +; GFX11-TRUE16: ; %bb.0: ; %bb |
| 377 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 378 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2 |
| 379 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
| 380 | +; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] |
| 381 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
| 382 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 |
| 383 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 384 | +; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
| 385 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 386 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 387 | +; |
| 388 | +; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat: |
| 389 | +; GFX11-FAKE16: ; %bb.0: ; %bb |
| 390 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 391 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 2 |
| 392 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
| 393 | +; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] |
| 394 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
| 395 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0 |
| 396 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 397 | +; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
| 398 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 399 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
376 | 400 | bb: |
377 | 401 | %gep_lo = getelementptr inbounds half, ptr null, i64 1 |
378 | 402 | %load_lo = load half, ptr %gep_lo |
@@ -403,14 +427,23 @@ define <2 x half> @chain_hi_to_lo_flat_different_bases(ptr %base_lo, ptr %base_h |
403 | 427 | ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
404 | 428 | ; GFX10-NEXT: s_setpc_b64 s[30:31] |
405 | 429 | ; |
406 | | -; GFX11-LABEL: chain_hi_to_lo_flat_different_bases: |
407 | | -; GFX11: ; %bb.0: ; %bb |
408 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
409 | | -; GFX11-NEXT: flat_load_u16 v0, v[0:1] |
410 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
411 | | -; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
412 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
413 | | -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 430 | +; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_different_bases: |
| 431 | +; GFX11-TRUE16: ; %bb.0: ; %bb |
| 432 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 433 | +; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] |
| 434 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 435 | +; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
| 436 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 437 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 438 | +; |
| 439 | +; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_different_bases: |
| 440 | +; GFX11-FAKE16: ; %bb.0: ; %bb |
| 441 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 442 | +; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] |
| 443 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 444 | +; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
| 445 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 446 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
414 | 447 | bb: |
415 | 448 | %load_lo = load half, ptr %base_lo |
416 | 449 | %load_hi = load half, ptr %base_hi |
@@ -864,17 +897,31 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) { |
864 | 897 | ; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
865 | 898 | ; GFX10-NEXT: s_setpc_b64 s[30:31] |
866 | 899 | ; |
867 | | -; GFX11-LABEL: chain_hi_to_lo_flat_other_dep: |
868 | | -; GFX11: ; %bb.0: ; %bb |
869 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
870 | | -; GFX11-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc |
871 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) |
872 | | -; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
873 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
874 | | -; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
875 | | -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
876 | | -; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
877 | | -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 900 | +; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_other_dep: |
| 901 | +; GFX11-TRUE16: ; %bb.0: ; %bb |
| 902 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 903 | +; GFX11-TRUE16-NEXT: flat_load_d16_b16 v2, v[0:1] offset:2 glc dlc |
| 904 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 905 | +; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
| 906 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| 907 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l |
| 908 | +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 909 | +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
| 910 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 911 | +; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0 |
| 912 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 913 | +; |
| 914 | +; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_other_dep: |
| 915 | +; GFX11-FAKE16: ; %bb.0: ; %bb |
| 916 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 917 | +; GFX11-FAKE16-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc |
| 918 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 919 | +; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
| 920 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 921 | +; GFX11-FAKE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
| 922 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 923 | +; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
| 924 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
878 | 925 | bb: |
879 | 926 | %gep_lo = getelementptr inbounds i16, ptr addrspace(0) %ptr, i64 1 |
880 | 927 | %load_lo = load volatile i16, ptr addrspace(0) %gep_lo |
|
0 commit comments