55; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906
66; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT
77; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT
8+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -mattr="+dot7-insts,-dot10-insts" -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DOT10-DISABLED
89; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
910
1011; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
2122
2223; GFX906-CONTRACT: v_mac_f16_e32
2324; GFX906-DENORM-CONTRACT: v_fma_f16
25+ ; GFX906-DOT10-DISABLED: v_fma_f16
2426define amdgpu_kernel void @dotproduct_f16 (ptr addrspace (1 ) %src1 ,
2527 ptr addrspace (1 ) %src2 ,
2628 ptr addrspace (1 ) nocapture %dst ) {
@@ -44,8 +46,11 @@ entry:
4446}
4547
4648
47- ; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
48- ; and the vectors are of type <2 x half>
49+ ; We only want to generate fdot2 if:
50+ ; - vector element of dot product is converted from f16 to f32, and
51+ ; - the vectors are of type <2 x half>, and
52+ ; - "dot10-insts" is enabled
53+
4954; GCN-LABEL: {{^}}dotproduct_f16_f32
5055; GFX900: v_mad_mix_f32
5156; GFX900: v_mad_mix_f32
5964; GFX906-CONTRACT: v_dot2_f32_f16
6065
6166; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
67+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
6268define amdgpu_kernel void @dotproduct_f16_f32 (ptr addrspace (1 ) %src1 ,
6369 ptr addrspace (1 ) %src2 ,
6470 ptr addrspace (1 ) nocapture %dst ) {
@@ -85,8 +91,11 @@ entry:
8591 ret void
8692}
8793
88- ; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
89- ; and the vectors are of type <2 x half>
94+ ; We only want to generate fdot2 if:
95+ ; - vector element of dot product is converted from f16 to f32, and
96+ ; - the vectors are of type <2 x half>, and
97+ ; - "dot10-insts" is enabled
98+
9099; GCN-LABEL: {{^}}dotproduct_diffvecorder
91100; GFX900: v_mad_mix_f32
92101; GFX900: v_mad_mix_f32
@@ -99,6 +108,7 @@ entry:
99108
100109; GFX906-CONTRACT: v_dot2_f32_f16
101110; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
111+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
102112define amdgpu_kernel void @dotproduct_diffvecorder (ptr addrspace (1 ) %src1 ,
103113 ptr addrspace (1 ) %src2 ,
104114 ptr addrspace (1 ) nocapture %dst ) {
@@ -136,6 +146,7 @@ entry:
136146
137147; GFX906-CONTRACT: v_fma_mix_f32
138148; GFX906-DENORM-CONTRACT: v_fma_mix_f32
149+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
139150define amdgpu_kernel void @dotproduct_v4f16 (ptr addrspace (1 ) %src1 ,
140151 ptr addrspace (1 ) %src2 ,
141152 ptr addrspace (1 ) nocapture %dst ) {
@@ -173,6 +184,7 @@ entry:
173184
174185; GFX906-CONTRACT: v_fma_mix_f32
175186; GFX906-DENORM-CONTRACT: v_fma_mix_f32
187+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
176188define amdgpu_kernel void @NotAdotproduct (ptr addrspace (1 ) %src1 ,
177189 ptr addrspace (1 ) %src2 ,
178190 ptr addrspace (1 ) nocapture %dst ) {
@@ -210,6 +222,7 @@ entry:
210222
211223; GFX906-CONTRACT: v_fma_mix_f32
212224; GFX906-DENORM-CONTRACT: v_fma_mix_f32
225+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
213226define amdgpu_kernel void @Diff_Idx_NotAdotproduct (ptr addrspace (1 ) %src1 ,
214227 ptr addrspace (1 ) %src2 ,
215228 ptr addrspace (1 ) nocapture %dst ) {
0 commit comments