|
5 | 5 | ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
|
6 | 6 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
|
7 | 7 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
|
8 |
| -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG %s |
9 |
| -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL %s |
| 8 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s |
| 9 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s |
| 10 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s |
| 11 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s |
10 | 12 |
|
11 | 13 | define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
|
12 | 14 | ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
|
@@ -7531,19 +7533,61 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %o
|
7531 | 7533 | ; GFX9-NEXT: global_store_short v0, v1, s[0:1]
|
7532 | 7534 | ; GFX9-NEXT: s_endpgm
|
7533 | 7535 | ;
|
7534 |
| -; GFX11-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: |
7535 |
| -; GFX11: ; %bb.0: |
7536 |
| -; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
7537 |
| -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
7538 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) |
7539 |
| -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
7540 |
| -; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
7541 |
| -; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] |
7542 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
7543 |
| -; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 |
7544 |
| -; GFX11-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 |
7545 |
| -; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] |
7546 |
| -; GFX11-NEXT: s_endpgm |
| 7536 | +; GFX11-SDAG-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: |
| 7537 | +; GFX11-SDAG-FAKE16: ; %bb.0: |
| 7538 | +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 7539 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 7540 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) |
| 7541 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 7542 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 7543 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] |
| 7544 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 7545 | +; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 |
| 7546 | +; GFX11-SDAG-FAKE16-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 |
| 7547 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 7548 | +; GFX11-SDAG-FAKE16-NEXT: s_endpgm |
| 7549 | +; |
| 7550 | +; GFX11-GISEL-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: |
| 7551 | +; GFX11-GISEL-FAKE16: ; %bb.0: |
| 7552 | +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 7553 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 7554 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) |
| 7555 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 7556 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 7557 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] |
| 7558 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 7559 | +; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 |
| 7560 | +; GFX11-GISEL-FAKE16-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 |
| 7561 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 7562 | +; GFX11-GISEL-FAKE16-NEXT: s_endpgm |
| 7563 | +; |
| 7564 | +; GFX11-SDAG-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: |
| 7565 | +; GFX11-SDAG-TRUE16: ; %bb.0: |
| 7566 | +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 7567 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 7568 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) |
| 7569 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 7570 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 7571 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] |
| 7572 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 7573 | +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l |
| 7574 | +; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, 2.0, 4.0 |
| 7575 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] |
| 7576 | +; GFX11-SDAG-TRUE16-NEXT: s_endpgm |
| 7577 | +; |
| 7578 | +; GFX11-GISEL-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: |
| 7579 | +; GFX11-GISEL-TRUE16: ; %bb.0: |
| 7580 | +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 7581 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 7582 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) |
| 7583 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 7584 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 7585 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] |
| 7586 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 7587 | +; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l |
| 7588 | +; GFX11-GISEL-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, 2.0, 4.0 |
| 7589 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] |
| 7590 | +; GFX11-GISEL-TRUE16-NEXT: s_endpgm |
7547 | 7591 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
7548 | 7592 | %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
|
7549 | 7593 | %outgep = getelementptr half, ptr addrspace(1) %out, i32 %tid
|
@@ -7723,26 +7767,92 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, pt
|
7723 | 7767 | ; GFX9-NEXT: global_store_short v0, v1, s[8:9]
|
7724 | 7768 | ; GFX9-NEXT: s_endpgm
|
7725 | 7769 | ;
|
7726 |
| -; GFX11-LABEL: v_nnan_inputs_med3_f16_pat0: |
7727 |
| -; GFX11: ; %bb.0: |
7728 |
| -; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 |
7729 |
| -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
7730 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
7731 |
| -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
7732 |
| -; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
7733 |
| -; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
7734 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
7735 |
| -; GFX11-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc |
7736 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
7737 |
| -; GFX11-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc |
7738 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
7739 |
| -; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 |
7740 |
| -; GFX11-NEXT: v_add_f16_e32 v2, 2.0, v2 |
7741 |
| -; GFX11-NEXT: v_add_f16_e32 v3, 4.0, v3 |
7742 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
7743 |
| -; GFX11-NEXT: v_med3_f16 v1, v1, v2, v3 |
7744 |
| -; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] |
7745 |
| -; GFX11-NEXT: s_endpgm |
| 7770 | +; GFX11-SDAG-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0: |
| 7771 | +; GFX11-SDAG-FAKE16: ; %bb.0: |
| 7772 | +; GFX11-SDAG-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 |
| 7773 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 7774 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 7775 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 7776 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 7777 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
| 7778 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 7779 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc |
| 7780 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 7781 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc |
| 7782 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 7783 | +; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 |
| 7784 | +; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v2, 2.0, v2 |
| 7785 | +; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v3, 4.0, v3 |
| 7786 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 7787 | +; GFX11-SDAG-FAKE16-NEXT: v_med3_f16 v1, v1, v2, v3 |
| 7788 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 7789 | +; GFX11-SDAG-FAKE16-NEXT: s_endpgm |
| 7790 | +; |
| 7791 | +; GFX11-GISEL-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0: |
| 7792 | +; GFX11-GISEL-FAKE16: ; %bb.0: |
| 7793 | +; GFX11-GISEL-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 |
| 7794 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 7795 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 7796 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 7797 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 7798 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
| 7799 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 7800 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc |
| 7801 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 7802 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc |
| 7803 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 7804 | +; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 |
| 7805 | +; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v2, 2.0, v2 |
| 7806 | +; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v3, 4.0, v3 |
| 7807 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 7808 | +; GFX11-GISEL-FAKE16-NEXT: v_med3_f16 v1, v1, v2, v3 |
| 7809 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 7810 | +; GFX11-GISEL-FAKE16-NEXT: s_endpgm |
| 7811 | +; |
| 7812 | +; GFX11-SDAG-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0: |
| 7813 | +; GFX11-SDAG-TRUE16: ; %bb.0: |
| 7814 | +; GFX11-SDAG-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 |
| 7815 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 7816 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 7817 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 |
| 7818 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 7819 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc |
| 7820 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 7821 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v1, v2, s[4:5] glc dlc |
| 7822 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 7823 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v3, v2, s[6:7] glc dlc |
| 7824 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 7825 | +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l |
| 7826 | +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l |
| 7827 | +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l |
| 7828 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| 7829 | +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h |
| 7830 | +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v1.l, 4.0, v1.l |
| 7831 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 7832 | +; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, v0.h, v1.l |
| 7833 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] |
| 7834 | +; GFX11-SDAG-TRUE16-NEXT: s_endpgm |
| 7835 | +; |
| 7836 | +; GFX11-GISEL-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0: |
| 7837 | +; GFX11-GISEL-TRUE16: ; %bb.0: |
| 7838 | +; GFX11-GISEL-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 |
| 7839 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 7840 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 7841 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 |
| 7842 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 7843 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc |
| 7844 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 7845 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v2, s[4:5] glc dlc |
| 7846 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 7847 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v3, v2, s[6:7] glc dlc |
| 7848 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 7849 | +; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l |
| 7850 | +; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.h, 2.0, v1.l |
| 7851 | +; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v1.l, 4.0, v3.l |
| 7852 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 7853 | +; GFX11-GISEL-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, v0.h, v1.l |
| 7854 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] |
| 7855 | +; GFX11-GISEL-TRUE16-NEXT: s_endpgm |
7746 | 7856 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
7747 | 7857 | %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
|
7748 | 7858 | %gep1 = getelementptr half, ptr addrspace(1) %bptr, i32 %tid
|
|
0 commit comments