|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | 2 | ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s |
3 | | -; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s |
| 3 | +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s |
4 | 4 |
|
5 | 5 | ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s |
6 | | -; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s |
| 6 | +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s |
7 | 7 |
|
8 | 8 | ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-SDAG %s |
9 | | -; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s |
| 9 | +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s |
10 | 10 |
|
11 | 11 | ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s |
12 | 12 | ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s |
13 | | -; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s |
14 | | -; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s |
| 13 | +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s |
| 14 | +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s |
15 | 15 |
|
16 | 16 | ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s |
17 | | -; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s |
| 17 | +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s |
18 | 18 |
|
19 | 19 | ; FIXME: promotion not handled without f16 insts |
20 | 20 |
|
@@ -627,25 +627,41 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half> |
627 | 627 | ; |
628 | 628 | ; GFX8-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict: |
629 | 629 | ; GFX8-GISEL: ; %bb.0: |
630 | | -; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16 |
631 | | -; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16 |
632 | 630 | ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3 |
633 | | -; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 |
634 | | -; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 |
| 631 | +; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16 |
635 | 632 | ; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s2, v0 |
636 | | -; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
637 | | -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| 633 | +; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16 |
| 634 | +; GFX8-GISEL-NEXT: v_readfirstlane_b32 s2, v0 |
| 635 | +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s1 |
| 636 | +; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s0, v0 |
| 637 | +; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0 |
| 638 | +; GFX8-GISEL-NEXT: s_and_b32 s0, 0xffff, s0 |
| 639 | +; GFX8-GISEL-NEXT: s_and_b32 s1, 0xffff, s2 |
| 640 | +; GFX8-GISEL-NEXT: s_lshl_b32 s0, s0, 16 |
| 641 | +; GFX8-GISEL-NEXT: s_or_b32 s0, s1, s0 |
| 642 | +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
638 | 643 | ; GFX8-GISEL-NEXT: ; return to shader part epilog |
639 | 644 | ; |
640 | 645 | ; GFX10PLUS-LABEL: s_constained_fmul_v2f16_fpexcept_strict: |
641 | 646 | ; GFX10PLUS: ; %bb.0: |
642 | 647 | ; GFX10PLUS-NEXT: v_pk_mul_f16 v0, s2, s3 |
643 | 648 | ; GFX10PLUS-NEXT: ; return to shader part epilog |
644 | 649 | ; |
645 | | -; GFX12-LABEL: s_constained_fmul_v2f16_fpexcept_strict: |
646 | | -; GFX12: ; %bb.0: |
647 | | -; GFX12-NEXT: v_pk_mul_f16 v0, s2, s3 |
648 | | -; GFX12-NEXT: ; return to shader part epilog |
| 650 | +; GFX12-SDAG-LABEL: s_constained_fmul_v2f16_fpexcept_strict: |
| 651 | +; GFX12-SDAG: ; %bb.0: |
| 652 | +; GFX12-SDAG-NEXT: v_pk_mul_f16 v0, s2, s3 |
| 653 | +; GFX12-SDAG-NEXT: ; return to shader part epilog |
| 654 | +; |
| 655 | +; GFX12-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict: |
| 656 | +; GFX12-GISEL: ; %bb.0: |
| 657 | +; GFX12-GISEL-NEXT: s_lshr_b32 s0, s2, 16 |
| 658 | +; GFX12-GISEL-NEXT: s_lshr_b32 s1, s3, 16 |
| 659 | +; GFX12-GISEL-NEXT: s_mul_f16 s2, s2, s3 |
| 660 | +; GFX12-GISEL-NEXT: s_mul_f16 s0, s0, s1 |
| 661 | +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| 662 | +; GFX12-GISEL-NEXT: s_pack_ll_b32_b16 s0, s2, s0 |
| 663 | +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 |
| 664 | +; GFX12-GISEL-NEXT: ; return to shader part epilog |
649 | 665 | %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") |
650 | 666 | ret <2 x half> %val |
651 | 667 | } |
|
0 commit comments