|
3 | 3 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s |
4 | 4 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX10,GFX10_DEFAULT %s |
5 | 5 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX10,FLATSCR_GFX10 %s |
6 | | -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s |
7 | | -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s |
8 | | -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s |
9 | | -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s |
| 6 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX11 %s |
| 7 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX11 %s |
10 | 8 |
|
11 | 9 | define <2 x half> @chain_hi_to_lo_private() { |
12 | 10 | ; GFX900-LABEL: chain_hi_to_lo_private: |
@@ -158,23 +156,14 @@ define <2 x half> @chain_hi_to_lo_arithmatic(ptr addrspace(5) %base, half %in) { |
158 | 156 | ; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v0, v1 |
159 | 157 | ; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31] |
160 | 158 | ; |
161 | | -; GFX11-TRUE16-LABEL: chain_hi_to_lo_arithmatic: |
162 | | -; GFX11-TRUE16: ; %bb.0: ; %bb |
163 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
164 | | -; GFX11-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l |
165 | | -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
166 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
167 | | -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v1 |
168 | | -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
169 | | -; |
170 | | -; GFX11-FAKE16-LABEL: chain_hi_to_lo_arithmatic: |
171 | | -; GFX11-FAKE16: ; %bb.0: ; %bb |
172 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
173 | | -; GFX11-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 |
174 | | -; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
175 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
176 | | -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v1 |
177 | | -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 159 | +; GFX11-LABEL: chain_hi_to_lo_arithmatic: |
| 160 | +; GFX11: ; %bb.0: ; %bb |
| 161 | +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 162 | +; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 |
| 163 | +; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
| 164 | +; GFX11-NEXT: s_waitcnt vmcnt(0) |
| 165 | +; GFX11-NEXT: v_mov_b32_e32 v0, v1 |
| 166 | +; GFX11-NEXT: s_setpc_b64 s[30:31] |
178 | 167 | bb: |
179 | 168 | %arith_lo = fadd half %in, 1.0 |
180 | 169 | %load_hi = load half, ptr addrspace(5) %base |
@@ -372,31 +361,18 @@ define <2 x half> @chain_hi_to_lo_flat() { |
372 | 361 | ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
373 | 362 | ; GFX10-NEXT: s_setpc_b64 s[30:31] |
374 | 363 | ; |
375 | | -; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat: |
376 | | -; GFX11-TRUE16: ; %bb.0: ; %bb |
377 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
378 | | -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2 |
379 | | -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
380 | | -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] |
381 | | -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
382 | | -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 |
383 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
384 | | -; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
385 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
386 | | -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
387 | | -; |
388 | | -; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat: |
389 | | -; GFX11-FAKE16: ; %bb.0: ; %bb |
390 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
391 | | -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 2 |
392 | | -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
393 | | -; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] |
394 | | -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
395 | | -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0 |
396 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
397 | | -; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
398 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
399 | | -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 364 | +; GFX11-LABEL: chain_hi_to_lo_flat: |
| 365 | +; GFX11: ; %bb.0: ; %bb |
| 366 | +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 367 | +; GFX11-NEXT: v_mov_b32_e32 v0, 2 |
| 368 | +; GFX11-NEXT: v_mov_b32_e32 v1, 0 |
| 369 | +; GFX11-NEXT: flat_load_u16 v0, v[0:1] |
| 370 | +; GFX11-NEXT: v_mov_b32_e32 v1, 0 |
| 371 | +; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| 372 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 373 | +; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
| 374 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 375 | +; GFX11-NEXT: s_setpc_b64 s[30:31] |
400 | 376 | bb: |
401 | 377 | %gep_lo = getelementptr inbounds half, ptr null, i64 1 |
402 | 378 | %load_lo = load half, ptr %gep_lo |
@@ -427,23 +403,14 @@ define <2 x half> @chain_hi_to_lo_flat_different_bases(ptr %base_lo, ptr %base_h |
427 | 403 | ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
428 | 404 | ; GFX10-NEXT: s_setpc_b64 s[30:31] |
429 | 405 | ; |
430 | | -; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_different_bases: |
431 | | -; GFX11-TRUE16: ; %bb.0: ; %bb |
432 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
433 | | -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] |
434 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
435 | | -; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
436 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
437 | | -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
438 | | -; |
439 | | -; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_different_bases: |
440 | | -; GFX11-FAKE16: ; %bb.0: ; %bb |
441 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
442 | | -; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] |
443 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
444 | | -; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
445 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
446 | | -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 406 | +; GFX11-LABEL: chain_hi_to_lo_flat_different_bases: |
| 407 | +; GFX11: ; %bb.0: ; %bb |
| 408 | +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 409 | +; GFX11-NEXT: flat_load_u16 v0, v[0:1] |
| 410 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 411 | +; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
| 412 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 413 | +; GFX11-NEXT: s_setpc_b64 s[30:31] |
447 | 414 | bb: |
448 | 415 | %load_lo = load half, ptr %base_lo |
449 | 416 | %load_hi = load half, ptr %base_hi |
@@ -897,31 +864,17 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) { |
897 | 864 | ; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
898 | 865 | ; GFX10-NEXT: s_setpc_b64 s[30:31] |
899 | 866 | ; |
900 | | -; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_other_dep: |
901 | | -; GFX11-TRUE16: ; %bb.0: ; %bb |
902 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
903 | | -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v2, v[0:1] offset:2 glc dlc |
904 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
905 | | -; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
906 | | -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
907 | | -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l |
908 | | -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
909 | | -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
910 | | -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
911 | | -; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0 |
912 | | -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
913 | | -; |
914 | | -; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_other_dep: |
915 | | -; GFX11-FAKE16: ; %bb.0: ; %bb |
916 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
917 | | -; GFX11-FAKE16-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc |
918 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
919 | | -; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
920 | | -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
921 | | -; GFX11-FAKE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
922 | | -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
923 | | -; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
924 | | -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 867 | +; GFX11-LABEL: chain_hi_to_lo_flat_other_dep: |
| 868 | +; GFX11: ; %bb.0: ; %bb |
| 869 | +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 870 | +; GFX11-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc |
| 871 | +; GFX11-NEXT: s_waitcnt vmcnt(0) |
| 872 | +; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
| 873 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 874 | +; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
| 875 | +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 876 | +; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
| 877 | +; GFX11-NEXT: s_setpc_b64 s[30:31] |
925 | 878 | bb: |
926 | 879 | %gep_lo = getelementptr inbounds i16, ptr addrspace(0) %ptr, i64 1 |
927 | 880 | %load_lo = load volatile i16, ptr addrspace(0) %gep_lo |
|
0 commit comments