|
12 | 12 | ; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FLUSH,GFX9-FLUSH-MAD %s |
13 | 13 | ; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DENORM,GFX9-DENORM-FASTFMA-MAD %s |
14 | 14 |
|
15 | | -; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FMAC,GFX9-FLUSH %s |
16 | | -; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FMAC,GFX9-DENORM %s |
| 15 | +; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FLUSH,GFX9-FLUSH-FMAC %s |
| 16 | +; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DENORM,GFX9-DENORM-FASTFMA-FMAC %s |
17 | 17 |
|
18 | 18 | ; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX10-FLUSH %s |
19 | 19 | ; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx1030 -denormal-fp-math-f32=ieee -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX10-DENORM %s |
@@ -140,18 +140,31 @@ define amdgpu_kernel void @fmuladd_f32(ptr addrspace(1) %out, ptr addrspace(1) % |
140 | 140 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[8:9] |
141 | 141 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm |
142 | 142 | ; |
143 | | -; GFX9-FMAC-LABEL: fmuladd_f32: |
144 | | -; GFX9-FMAC: ; %bb.0: |
145 | | -; GFX9-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 |
146 | | -; GFX9-FMAC-NEXT: v_mov_b32_e32 v0, 0 |
147 | | -; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
148 | | -; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[10:11] |
149 | | -; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[12:13] |
150 | | -; GFX9-FMAC-NEXT: global_load_dword v3, v0, s[14:15] |
151 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
152 | | -; GFX9-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2 |
153 | | -; GFX9-FMAC-NEXT: global_store_dword v0, v3, s[8:9] |
154 | | -; GFX9-FMAC-NEXT: s_endpgm |
| 143 | +; GFX9-FLUSH-FMAC-LABEL: fmuladd_f32: |
| 144 | +; GFX9-FLUSH-FMAC: ; %bb.0: |
| 145 | +; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 |
| 146 | +; GFX9-FLUSH-FMAC-NEXT: v_mov_b32_e32 v0, 0 |
| 147 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 148 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[10:11] |
| 149 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[12:13] |
| 150 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v3, v0, s[14:15] |
| 151 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 152 | +; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2 |
| 153 | +; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v3, s[8:9] |
| 154 | +; GFX9-FLUSH-FMAC-NEXT: s_endpgm |
| 155 | +; |
| 156 | +; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_f32: |
| 157 | +; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0: |
| 158 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 |
| 159 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_mov_b32_e32 v0, 0 |
| 160 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 161 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[10:11] |
| 162 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[12:13] |
| 163 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v3, v0, s[14:15] |
| 164 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 165 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2 |
| 166 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v3, s[8:9] |
| 167 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm |
155 | 168 | ; |
156 | 169 | ; GFX10-LABEL: fmuladd_f32: |
157 | 170 | ; GFX10: ; %bb.0: |
@@ -489,20 +502,35 @@ define amdgpu_kernel void @fmul_fadd_contract_f32(ptr addrspace(1) %out, ptr add |
489 | 502 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[8:9] |
490 | 503 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm |
491 | 504 | ; |
492 | | -; GFX9-FMAC-LABEL: fmul_fadd_contract_f32: |
493 | | -; GFX9-FMAC: ; %bb.0: |
494 | | -; GFX9-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 |
495 | | -; GFX9-FMAC-NEXT: v_mov_b32_e32 v0, 0 |
496 | | -; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
497 | | -; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[10:11] glc |
498 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
499 | | -; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[12:13] glc |
500 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
501 | | -; GFX9-FMAC-NEXT: global_load_dword v3, v0, s[14:15] glc |
502 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
503 | | -; GFX9-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2 |
504 | | -; GFX9-FMAC-NEXT: global_store_dword v0, v3, s[8:9] |
505 | | -; GFX9-FMAC-NEXT: s_endpgm |
| 505 | +; GFX9-FLUSH-FMAC-LABEL: fmul_fadd_contract_f32: |
| 506 | +; GFX9-FLUSH-FMAC: ; %bb.0: |
| 507 | +; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 |
| 508 | +; GFX9-FLUSH-FMAC-NEXT: v_mov_b32_e32 v0, 0 |
| 509 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 510 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[10:11] glc |
| 511 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 512 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[12:13] glc |
| 513 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 514 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v3, v0, s[14:15] glc |
| 515 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 516 | +; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2 |
| 517 | +; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v3, s[8:9] |
| 518 | +; GFX9-FLUSH-FMAC-NEXT: s_endpgm |
| 519 | +; |
| 520 | +; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmul_fadd_contract_f32: |
| 521 | +; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0: |
| 522 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 |
| 523 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_mov_b32_e32 v0, 0 |
| 524 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 525 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[10:11] glc |
| 526 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 527 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[12:13] glc |
| 528 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 529 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v3, v0, s[14:15] glc |
| 530 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 531 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2 |
| 532 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v3, s[8:9] |
| 533 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm |
506 | 534 | ; |
507 | 535 | ; GFX10-LABEL: fmul_fadd_contract_f32: |
508 | 536 | ; GFX10: ; %bb.0: |
@@ -603,18 +631,31 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrsp |
603 | 631 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1] |
604 | 632 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm |
605 | 633 | ; |
606 | | -; GFX9-FMAC-LABEL: fmuladd_2.0_a_b_f32: |
607 | | -; GFX9-FMAC: ; %bb.0: |
608 | | -; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
609 | | -; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
610 | | -; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
611 | | -; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
612 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
613 | | -; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
614 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
615 | | -; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1 |
616 | | -; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
617 | | -; GFX9-FMAC-NEXT: s_endpgm |
| 634 | +; GFX9-FLUSH-FMAC-LABEL: fmuladd_2.0_a_b_f32: |
| 635 | +; GFX9-FLUSH-FMAC: ; %bb.0: |
| 636 | +; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 637 | +; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 638 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 639 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 640 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 641 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 642 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 643 | +; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1 |
| 644 | +; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 645 | +; GFX9-FLUSH-FMAC-NEXT: s_endpgm |
| 646 | +; |
| 647 | +; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_2.0_a_b_f32: |
| 648 | +; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0: |
| 649 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 650 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 651 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 652 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 653 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 654 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 655 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 656 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1 |
| 657 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 658 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm |
618 | 659 | ; |
619 | 660 | ; GFX10-LABEL: fmuladd_2.0_a_b_f32: |
620 | 661 | ; GFX10: ; %bb.0: |
@@ -717,18 +758,31 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f32(ptr addrspace(1) %out, ptr addrsp |
717 | 758 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1] |
718 | 759 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm |
719 | 760 | ; |
720 | | -; GFX9-FMAC-LABEL: fmuladd_a_2.0_b_f32: |
721 | | -; GFX9-FMAC: ; %bb.0: |
722 | | -; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
723 | | -; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
724 | | -; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
725 | | -; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
726 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
727 | | -; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
728 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
729 | | -; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1 |
730 | | -; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
731 | | -; GFX9-FMAC-NEXT: s_endpgm |
| 761 | +; GFX9-FLUSH-FMAC-LABEL: fmuladd_a_2.0_b_f32: |
| 762 | +; GFX9-FLUSH-FMAC: ; %bb.0: |
| 763 | +; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 764 | +; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 765 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 766 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 767 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 768 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 769 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 770 | +; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1 |
| 771 | +; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 772 | +; GFX9-FLUSH-FMAC-NEXT: s_endpgm |
| 773 | +; |
| 774 | +; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_a_2.0_b_f32: |
| 775 | +; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0: |
| 776 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 777 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 778 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 779 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 780 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 781 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 782 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 783 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1 |
| 784 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 785 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm |
732 | 786 | ; |
733 | 787 | ; GFX10-LABEL: fmuladd_a_2.0_b_f32: |
734 | 788 | ; GFX10: ; %bb.0: |
@@ -1099,18 +1153,31 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(ptr addrspace(1) %out, ptr ad |
1099 | 1153 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1] |
1100 | 1154 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm |
1101 | 1155 | ; |
1102 | | -; GFX9-FMAC-LABEL: fmuladd_neg_2.0_a_b_f32: |
1103 | | -; GFX9-FMAC: ; %bb.0: |
1104 | | -; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
1105 | | -; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
1106 | | -; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
1107 | | -; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
1108 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
1109 | | -; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
1110 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
1111 | | -; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1 |
1112 | | -; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
1113 | | -; GFX9-FMAC-NEXT: s_endpgm |
| 1156 | +; GFX9-FLUSH-FMAC-LABEL: fmuladd_neg_2.0_a_b_f32: |
| 1157 | +; GFX9-FLUSH-FMAC: ; %bb.0: |
| 1158 | +; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1159 | +; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1160 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 1161 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 1162 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1163 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 1164 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1165 | +; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1 |
| 1166 | +; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 1167 | +; GFX9-FLUSH-FMAC-NEXT: s_endpgm |
| 1168 | +; |
| 1169 | +; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_neg_2.0_a_b_f32: |
| 1170 | +; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0: |
| 1171 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1172 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1173 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 1174 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 1175 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1176 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 1177 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1178 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1 |
| 1179 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 1180 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm |
1114 | 1181 | ; |
1115 | 1182 | ; GFX10-LABEL: fmuladd_neg_2.0_a_b_f32: |
1116 | 1183 | ; GFX10: ; %bb.0: |
@@ -1213,18 +1280,31 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, pt |
1213 | 1280 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1] |
1214 | 1281 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm |
1215 | 1282 | ; |
1216 | | -; GFX9-FMAC-LABEL: fmuladd_neg_2.0_neg_a_b_f32: |
1217 | | -; GFX9-FMAC: ; %bb.0: |
1218 | | -; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
1219 | | -; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
1220 | | -; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
1221 | | -; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
1222 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
1223 | | -; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
1224 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
1225 | | -; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1 |
1226 | | -; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
1227 | | -; GFX9-FMAC-NEXT: s_endpgm |
| 1283 | +; GFX9-FLUSH-FMAC-LABEL: fmuladd_neg_2.0_neg_a_b_f32: |
| 1284 | +; GFX9-FLUSH-FMAC: ; %bb.0: |
| 1285 | +; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1286 | +; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1287 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 1288 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 1289 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1290 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 1291 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1292 | +; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1 |
| 1293 | +; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 1294 | +; GFX9-FLUSH-FMAC-NEXT: s_endpgm |
| 1295 | +; |
| 1296 | +; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_neg_2.0_neg_a_b_f32: |
| 1297 | +; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0: |
| 1298 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1299 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1300 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 1301 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 1302 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1303 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 1304 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1305 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1 |
| 1306 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 1307 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm |
1228 | 1308 | ; |
1229 | 1309 | ; GFX10-LABEL: fmuladd_neg_2.0_neg_a_b_f32: |
1230 | 1310 | ; GFX10: ; %bb.0: |
@@ -1329,18 +1409,31 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr ad |
1329 | 1409 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1] |
1330 | 1410 | ; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm |
1331 | 1411 | ; |
1332 | | -; GFX9-FMAC-LABEL: fmuladd_2.0_neg_a_b_f32: |
1333 | | -; GFX9-FMAC: ; %bb.0: |
1334 | | -; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
1335 | | -; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
1336 | | -; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
1337 | | -; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
1338 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
1339 | | -; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
1340 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
1341 | | -; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1 |
1342 | | -; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
1343 | | -; GFX9-FMAC-NEXT: s_endpgm |
| 1412 | +; GFX9-FLUSH-FMAC-LABEL: fmuladd_2.0_neg_a_b_f32: |
| 1413 | +; GFX9-FLUSH-FMAC: ; %bb.0: |
| 1414 | +; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1415 | +; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1416 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 1417 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 1418 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1419 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 1420 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1421 | +; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1 |
| 1422 | +; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 1423 | +; GFX9-FLUSH-FMAC-NEXT: s_endpgm |
| 1424 | +; |
| 1425 | +; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_2.0_neg_a_b_f32: |
| 1426 | +; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0: |
| 1427 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1428 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1429 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 1430 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 1431 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1432 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 1433 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1434 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1 |
| 1435 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1] |
| 1436 | +; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm |
1344 | 1437 | ; |
1345 | 1438 | ; GFX10-LABEL: fmuladd_2.0_neg_a_b_f32: |
1346 | 1439 | ; GFX10: ; %bb.0: |
@@ -1445,18 +1538,18 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(ptr addrspace(1) %out, ptr ad |
1445 | 1538 | ; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1] |
1446 | 1539 | ; GFX9-DENORM-NEXT: s_endpgm |
1447 | 1540 | ; |
1448 | | -; GFX9-FMAC-LABEL: fmuladd_2.0_a_neg_b_f32: |
1449 | | -; GFX9-FMAC: ; %bb.0: |
1450 | | -; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
1451 | | -; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
1452 | | -; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
1453 | | -; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
1454 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
1455 | | -; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
1456 | | -; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0) |
1457 | | -; GFX9-FMAC-NEXT: v_fma_f32 v1, v1, 2.0, -v2 |
1458 | | -; GFX9-FMAC-NEXT: global_store_dword v0, v1, s[0:1] |
1459 | | -; GFX9-FMAC-NEXT: s_endpgm |
| 1541 | +; GFX9-FLUSH-FMAC-LABEL: fmuladd_2.0_a_neg_b_f32: |
| 1542 | +; GFX9-FLUSH-FMAC: ; %bb.0: |
| 1543 | +; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1544 | +; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1545 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0) |
| 1546 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc |
| 1547 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1548 | +; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc |
| 1549 | +; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0) |
| 1550 | +; GFX9-FLUSH-FMAC-NEXT: v_fma_f32 v1, v1, 2.0, -v2 |
| 1551 | +; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v1, s[0:1] |
| 1552 | +; GFX9-FLUSH-FMAC-NEXT: s_endpgm |
1460 | 1553 | ; |
1461 | 1554 | ; GFX10-LABEL: fmuladd_2.0_a_neg_b_f32: |
1462 | 1555 | ; GFX10: ; %bb.0: |
@@ -2728,6 +2821,3 @@ define amdgpu_kernel void @fsub_fadd_a_a_c_f32(ptr addrspace(1) %out, ptr addrsp |
2728 | 2821 |
|
2729 | 2822 | attributes #0 = { nounwind } |
2730 | 2823 | attributes #1 = { nounwind readnone } |
2731 | | -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
2732 | | -; GFX9-DENORM-FASTFMA-FMAC: {{.*}} |
2733 | | -; GFX9-FLUSH-FMAC: {{.*}} |
|
0 commit comments