Skip to content

Commit 42e3a1b

Browse files
committed
Remove duplicate prefix
1 parent 3f6b11d commit 42e3a1b

File tree

1 file changed

+193
-103
lines changed

1 file changed

+193
-103
lines changed

llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll

Lines changed: 193 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FLUSH,GFX9-FLUSH-MAD %s
1313
; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DENORM,GFX9-DENORM-FASTFMA-MAD %s
1414

15-
; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FMAC,GFX9-FLUSH %s
16-
; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FMAC,GFX9-DENORM %s
15+
; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FLUSH,GFX9-FLUSH-FMAC %s
16+
; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DENORM,GFX9-DENORM-FASTFMA-FMAC %s
1717

1818
; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX10-FLUSH %s
1919
; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx1030 -denormal-fp-math-f32=ieee -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX10-DENORM %s
@@ -140,18 +140,31 @@ define amdgpu_kernel void @fmuladd_f32(ptr addrspace(1) %out, ptr addrspace(1) %
140140
; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[8:9]
141141
; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
142142
;
143-
; GFX9-FMAC-LABEL: fmuladd_f32:
144-
; GFX9-FMAC: ; %bb.0:
145-
; GFX9-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
146-
; GFX9-FMAC-NEXT: v_mov_b32_e32 v0, 0
147-
; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0)
148-
; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[10:11]
149-
; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[12:13]
150-
; GFX9-FMAC-NEXT: global_load_dword v3, v0, s[14:15]
151-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
152-
; GFX9-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
153-
; GFX9-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
154-
; GFX9-FMAC-NEXT: s_endpgm
143+
; GFX9-FLUSH-FMAC-LABEL: fmuladd_f32:
144+
; GFX9-FLUSH-FMAC: ; %bb.0:
145+
; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
146+
; GFX9-FLUSH-FMAC-NEXT: v_mov_b32_e32 v0, 0
147+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
148+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[10:11]
149+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[12:13]
150+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v3, v0, s[14:15]
151+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
152+
; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
153+
; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
154+
; GFX9-FLUSH-FMAC-NEXT: s_endpgm
155+
;
156+
; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_f32:
157+
; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
158+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
159+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_mov_b32_e32 v0, 0
160+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
161+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[10:11]
162+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[12:13]
163+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v3, v0, s[14:15]
164+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
165+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
166+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
167+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
155168
;
156169
; GFX10-LABEL: fmuladd_f32:
157170
; GFX10: ; %bb.0:
@@ -489,20 +502,35 @@ define amdgpu_kernel void @fmul_fadd_contract_f32(ptr addrspace(1) %out, ptr add
489502
; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[8:9]
490503
; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
491504
;
492-
; GFX9-FMAC-LABEL: fmul_fadd_contract_f32:
493-
; GFX9-FMAC: ; %bb.0:
494-
; GFX9-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
495-
; GFX9-FMAC-NEXT: v_mov_b32_e32 v0, 0
496-
; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0)
497-
; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[10:11] glc
498-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
499-
; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[12:13] glc
500-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
501-
; GFX9-FMAC-NEXT: global_load_dword v3, v0, s[14:15] glc
502-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
503-
; GFX9-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
504-
; GFX9-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
505-
; GFX9-FMAC-NEXT: s_endpgm
505+
; GFX9-FLUSH-FMAC-LABEL: fmul_fadd_contract_f32:
506+
; GFX9-FLUSH-FMAC: ; %bb.0:
507+
; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
508+
; GFX9-FLUSH-FMAC-NEXT: v_mov_b32_e32 v0, 0
509+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
510+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[10:11] glc
511+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
512+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[12:13] glc
513+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
514+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v3, v0, s[14:15] glc
515+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
516+
; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
517+
; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
518+
; GFX9-FLUSH-FMAC-NEXT: s_endpgm
519+
;
520+
; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmul_fadd_contract_f32:
521+
; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
522+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
523+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_mov_b32_e32 v0, 0
524+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
525+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[10:11] glc
526+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
527+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[12:13] glc
528+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
529+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v3, v0, s[14:15] glc
530+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
531+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
532+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
533+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
506534
;
507535
; GFX10-LABEL: fmul_fadd_contract_f32:
508536
; GFX10: ; %bb.0:
@@ -603,18 +631,31 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrsp
603631
; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
604632
; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
605633
;
606-
; GFX9-FMAC-LABEL: fmuladd_2.0_a_b_f32:
607-
; GFX9-FMAC: ; %bb.0:
608-
; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
609-
; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
610-
; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0)
611-
; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
612-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
613-
; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
614-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
615-
; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
616-
; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
617-
; GFX9-FMAC-NEXT: s_endpgm
634+
; GFX9-FLUSH-FMAC-LABEL: fmuladd_2.0_a_b_f32:
635+
; GFX9-FLUSH-FMAC: ; %bb.0:
636+
; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
637+
; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
638+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
639+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
640+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
641+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
642+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
643+
; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
644+
; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
645+
; GFX9-FLUSH-FMAC-NEXT: s_endpgm
646+
;
647+
; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_2.0_a_b_f32:
648+
; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
649+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
650+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
651+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
652+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
653+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
654+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
655+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
656+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
657+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
658+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
618659
;
619660
; GFX10-LABEL: fmuladd_2.0_a_b_f32:
620661
; GFX10: ; %bb.0:
@@ -717,18 +758,31 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f32(ptr addrspace(1) %out, ptr addrsp
717758
; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
718759
; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
719760
;
720-
; GFX9-FMAC-LABEL: fmuladd_a_2.0_b_f32:
721-
; GFX9-FMAC: ; %bb.0:
722-
; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
723-
; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
724-
; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0)
725-
; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
726-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
727-
; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
728-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
729-
; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
730-
; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
731-
; GFX9-FMAC-NEXT: s_endpgm
761+
; GFX9-FLUSH-FMAC-LABEL: fmuladd_a_2.0_b_f32:
762+
; GFX9-FLUSH-FMAC: ; %bb.0:
763+
; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
764+
; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
765+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
766+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
767+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
768+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
769+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
770+
; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
771+
; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
772+
; GFX9-FLUSH-FMAC-NEXT: s_endpgm
773+
;
774+
; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_a_2.0_b_f32:
775+
; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
776+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
777+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
778+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
779+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
780+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
781+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
782+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
783+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
784+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
785+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
732786
;
733787
; GFX10-LABEL: fmuladd_a_2.0_b_f32:
734788
; GFX10: ; %bb.0:
@@ -1099,18 +1153,31 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(ptr addrspace(1) %out, ptr ad
10991153
; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
11001154
; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
11011155
;
1102-
; GFX9-FMAC-LABEL: fmuladd_neg_2.0_a_b_f32:
1103-
; GFX9-FMAC: ; %bb.0:
1104-
; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1105-
; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1106-
; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1107-
; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1108-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
1109-
; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1110-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
1111-
; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
1112-
; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
1113-
; GFX9-FMAC-NEXT: s_endpgm
1156+
; GFX9-FLUSH-FMAC-LABEL: fmuladd_neg_2.0_a_b_f32:
1157+
; GFX9-FLUSH-FMAC: ; %bb.0:
1158+
; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1159+
; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1160+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1161+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1162+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
1163+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1164+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
1165+
; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
1166+
; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
1167+
; GFX9-FLUSH-FMAC-NEXT: s_endpgm
1168+
;
1169+
; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_neg_2.0_a_b_f32:
1170+
; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
1171+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1172+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1173+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1174+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1175+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
1176+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1177+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
1178+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
1179+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
1180+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
11141181
;
11151182
; GFX10-LABEL: fmuladd_neg_2.0_a_b_f32:
11161183
; GFX10: ; %bb.0:
@@ -1213,18 +1280,31 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, pt
12131280
; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
12141281
; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
12151282
;
1216-
; GFX9-FMAC-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
1217-
; GFX9-FMAC: ; %bb.0:
1218-
; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1219-
; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1220-
; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1221-
; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1222-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
1223-
; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1224-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
1225-
; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
1226-
; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
1227-
; GFX9-FMAC-NEXT: s_endpgm
1283+
; GFX9-FLUSH-FMAC-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
1284+
; GFX9-FLUSH-FMAC: ; %bb.0:
1285+
; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1286+
; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1287+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1288+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1289+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
1290+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1291+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
1292+
; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
1293+
; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
1294+
; GFX9-FLUSH-FMAC-NEXT: s_endpgm
1295+
;
1296+
; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
1297+
; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
1298+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1299+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1300+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1301+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1302+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
1303+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1304+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
1305+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
1306+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
1307+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
12281308
;
12291309
; GFX10-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
12301310
; GFX10: ; %bb.0:
@@ -1329,18 +1409,31 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr ad
13291409
; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
13301410
; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
13311411
;
1332-
; GFX9-FMAC-LABEL: fmuladd_2.0_neg_a_b_f32:
1333-
; GFX9-FMAC: ; %bb.0:
1334-
; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1335-
; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1336-
; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1337-
; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1338-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
1339-
; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1340-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
1341-
; GFX9-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
1342-
; GFX9-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
1343-
; GFX9-FMAC-NEXT: s_endpgm
1412+
; GFX9-FLUSH-FMAC-LABEL: fmuladd_2.0_neg_a_b_f32:
1413+
; GFX9-FLUSH-FMAC: ; %bb.0:
1414+
; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1415+
; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1416+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1417+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1418+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
1419+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1420+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
1421+
; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
1422+
; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
1423+
; GFX9-FLUSH-FMAC-NEXT: s_endpgm
1424+
;
1425+
; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_2.0_neg_a_b_f32:
1426+
; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
1427+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1428+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1429+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1430+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1431+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
1432+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1433+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
1434+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
1435+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
1436+
; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
13441437
;
13451438
; GFX10-LABEL: fmuladd_2.0_neg_a_b_f32:
13461439
; GFX10: ; %bb.0:
@@ -1445,18 +1538,18 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(ptr addrspace(1) %out, ptr ad
14451538
; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
14461539
; GFX9-DENORM-NEXT: s_endpgm
14471540
;
1448-
; GFX9-FMAC-LABEL: fmuladd_2.0_a_neg_b_f32:
1449-
; GFX9-FMAC: ; %bb.0:
1450-
; GFX9-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1451-
; GFX9-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1452-
; GFX9-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1453-
; GFX9-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1454-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
1455-
; GFX9-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1456-
; GFX9-FMAC-NEXT: s_waitcnt vmcnt(0)
1457-
; GFX9-FMAC-NEXT: v_fma_f32 v1, v1, 2.0, -v2
1458-
; GFX9-FMAC-NEXT: global_store_dword v0, v1, s[0:1]
1459-
; GFX9-FMAC-NEXT: s_endpgm
1541+
; GFX9-FLUSH-FMAC-LABEL: fmuladd_2.0_a_neg_b_f32:
1542+
; GFX9-FLUSH-FMAC: ; %bb.0:
1543+
; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1544+
; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1545+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
1546+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
1547+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
1548+
; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
1549+
; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
1550+
; GFX9-FLUSH-FMAC-NEXT: v_fma_f32 v1, v1, 2.0, -v2
1551+
; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v1, s[0:1]
1552+
; GFX9-FLUSH-FMAC-NEXT: s_endpgm
14601553
;
14611554
; GFX10-LABEL: fmuladd_2.0_a_neg_b_f32:
14621555
; GFX10: ; %bb.0:
@@ -2728,6 +2821,3 @@ define amdgpu_kernel void @fsub_fadd_a_a_c_f32(ptr addrspace(1) %out, ptr addrsp
27282821

27292822
attributes #0 = { nounwind }
27302823
attributes #1 = { nounwind readnone }
2731-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
2732-
; GFX9-DENORM-FASTFMA-FMAC: {{.*}}
2733-
; GFX9-FLUSH-FMAC: {{.*}}

0 commit comments

Comments
 (0)