Skip to content

Commit 3f99c0c

Browse files
committed
Add testing coverage - part I
1 parent 8ca54e6 commit 3f99c0c

File tree

7 files changed

+1414
-4
lines changed

7 files changed

+1414
-4
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6628,10 +6628,12 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
66286628
SelectionDAG &DAG) const {
66296629
unsigned Opc = Op.getOpcode();
66306630
EVT VT = Op.getValueType();
6631-
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
6632-
VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
6633-
VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
6634-
VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16);
6631+
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16 ||
6632+
VT == MVT::v4f32 || VT == MVT::v8i16 || VT == MVT::v8f16 ||
6633+
VT == MVT::v8bf16 || VT == MVT::v16i16 || VT == MVT::v16f16 ||
6634+
VT == MVT::v16bf16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
6635+
VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16 ||
6636+
VT == MVT::v32bf16);
66356637

66366638
auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
66376639

@@ -6686,6 +6688,7 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
66866688
SelectionDAG &DAG) const {
66876689
unsigned Opc = Op.getOpcode();
66886690
EVT VT = Op.getValueType();
6691+
VT.dump();
66896692
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
66906693
VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 ||
66916694
VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||

llvm/test/Analysis/CostModel/AMDGPU/fadd.ll

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16,GFX90A-FASTF64 %s
33
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,FASTF16,FASTF64 %s
44
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,SLOWF64 %s
5+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
56
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16-SIZE,GFX90A-FASTF64-SIZE %s
67
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,FASTF16-SIZE %s
78
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,SLOWF64-SIZE %s
9+
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250-SIZE %s
810
; END.
911

1012
define amdgpu_kernel void @fadd_f32() #0 {
@@ -158,4 +160,33 @@ define amdgpu_kernel void @fadd_f16() #0 {
158160
ret void
159161
}
160162

163+
define amdgpu_kernel void @fadd_bf16() #0 {
164+
; GFX1250-LABEL: 'fadd_bf16'
165+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bf16 = fadd bfloat undef, undef
166+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = fadd <2 x bfloat> undef, undef
167+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = fadd <3 x bfloat> undef, undef
168+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = fadd <4 x bfloat> undef, undef
169+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5bf16 = fadd <5 x bfloat> undef, undef
170+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = fadd <16 x bfloat> undef, undef
171+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v17bf16 = fadd <17 x bfloat> undef, undef
172+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
173+
; GFX1250-SIZE-LABEL: 'fadd_bf16'
174+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = fadd bfloat undef, undef
175+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = fadd <2 x bfloat> undef, undef
176+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3bf16 = fadd <3 x bfloat> undef, undef
177+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = fadd <4 x bfloat> undef, undef
178+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5bf16 = fadd <5 x bfloat> undef, undef
179+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16bf16 = fadd <16 x bfloat> undef, undef
180+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17bf16 = fadd <17 x bfloat> undef, undef
181+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
182+
%bf16 = fadd bfloat undef, undef
183+
%v2bf16 = fadd <2 x bfloat> undef, undef
184+
%v3bf16 = fadd <3 x bfloat> undef, undef
185+
%v4bf16 = fadd <4 x bfloat> undef, undef
186+
%v5bf16 = fadd <5 x bfloat> undef, undef
187+
%v16bf16 = fadd <16 x bfloat> undef, undef
188+
%v17bf16 = fadd <17 x bfloat> undef, undef
189+
ret void
190+
}
191+
161192
attributes #0 = { nounwind }

llvm/test/Analysis/CostModel/AMDGPU/fma.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=FAST %s
33
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST %s
44
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
5+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
56

67
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=FAST-SIZE %s
78
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=FAST-SIZE %s
89
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=SLOW-SIZE %s
10+
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250-SIZE %s
911

1012

1113
define void @fma_f16() {
@@ -100,6 +102,24 @@ define void @fma_bf16() {
100102
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v17bf16 = call <17 x bfloat> @llvm.fma.v17bf16(<17 x bfloat> undef, <17 x bfloat> undef, <17 x bfloat> undef)
101103
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
102104
;
105+
; GFX1250-LABEL: 'fma_bf16'
106+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bf16 = call bfloat @llvm.fma.bf16(bfloat undef, bfloat undef, bfloat undef)
107+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef)
108+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = call <3 x bfloat> @llvm.fma.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef, <3 x bfloat> undef)
109+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef)
110+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = call <5 x bfloat> @llvm.fma.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef, <5 x bfloat> undef)
111+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef)
112+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v17bf16 = call <17 x bfloat> @llvm.fma.v17bf16(<17 x bfloat> undef, <17 x bfloat> undef, <17 x bfloat> undef)
113+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
114+
; GFX1250-SIZE-LABEL: 'fma_bf16'
115+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bf16 = call bfloat @llvm.fma.bf16(bfloat undef, bfloat undef, bfloat undef)
116+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef)
117+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = call <3 x bfloat> @llvm.fma.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef, <3 x bfloat> undef)
118+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef)
119+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = call <5 x bfloat> @llvm.fma.v5bf16(<5 x bfloat> undef, <5 x bfloat> undef, <5 x bfloat> undef)
120+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef)
121+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v17bf16 = call <17 x bfloat> @llvm.fma.v17bf16(<17 x bfloat> undef, <17 x bfloat> undef, <17 x bfloat> undef)
122+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
103123
%bf16 = call bfloat @llvm.fma.bf16(bfloat undef, bfloat undef, bfloat undef)
104124
%v2bf16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef)
105125
%v3bf16 = call <3 x bfloat> @llvm.fma.v3bf16(<3 x bfloat> undef, <3 x bfloat> undef, <3 x bfloat> undef)

llvm/test/Analysis/CostModel/AMDGPU/fmul.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9,GFX90A-FASTF64 %s
33
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9,F32,FASTF64 %s
44
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=F32,SLOW %s
5+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
56
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9-SIZE,GFX90A-SIZE %s
67
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=SIZE,GFX9-SIZE %s
78
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SIZE,SLOW-SIZE %s
9+
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250-SIZE %s
810
; END.
911

1012
define amdgpu_kernel void @fmul_f32() #0 {
@@ -179,6 +181,16 @@ define amdgpu_kernel void @fmul_bf16() #0 {
179181
; SLOW-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17bf16 = fmul <17 x bfloat> undef, undef
180182
; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
181183
;
184+
; GFX1250-LABEL: 'fmul_bf16'
185+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bf16 = fmul bfloat undef, undef
186+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = fmul <2 x bfloat> undef, undef
187+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = fmul <3 x bfloat> undef, undef
188+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = fmul <4 x bfloat> undef, undef
189+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5bf16 = fmul <5 x bfloat> undef, undef
190+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = fmul <16 x bfloat> undef, undef
191+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v17bf16 = fmul <17 x bfloat> undef, undef
192+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
193+
;
182194
; GFX9-SIZE-LABEL: 'fmul_bf16'
183195
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = fmul bfloat undef, undef
184196
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = fmul <2 x bfloat> undef, undef
@@ -199,6 +211,15 @@ define amdgpu_kernel void @fmul_bf16() #0 {
199211
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17bf16 = fmul <17 x bfloat> undef, undef
200212
; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
201213
;
214+
; GFX1250-SIZE-LABEL: 'fmul_bf16'
215+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = fmul bfloat undef, undef
216+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = fmul <2 x bfloat> undef, undef
217+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3bf16 = fmul <3 x bfloat> undef, undef
218+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = fmul <4 x bfloat> undef, undef
219+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5bf16 = fmul <5 x bfloat> undef, undef
220+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16bf16 = fmul <16 x bfloat> undef, undef
221+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17bf16 = fmul <17 x bfloat> undef, undef
222+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
202223
%bf16 = fmul bfloat undef, undef
203224
%v2bf16 = fmul <2 x bfloat> undef, undef
204225
%v3bf16 = fmul <3 x bfloat> undef, undef

llvm/test/Analysis/CostModel/AMDGPU/fsub.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16,GFX90A-FASTF64 %s
33
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,FASTF16,FASTF64 %s
44
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,SLOWF64 %s
5+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s
56
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16-SIZE,GFX90A-FASTF64-SIZE %s
67
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,FASTF16-SIZE %s
78
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,SLOWF64-SIZE %s
9+
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250-SIZE %s
810
; END.
911

1012
define amdgpu_kernel void @fsub_f32() #0 {
@@ -157,3 +159,34 @@ define amdgpu_kernel void @fsub_f16() #0 {
157159
%v17f16 = fsub <17 x half> undef, undef
158160
ret void
159161
}
162+
163+
define amdgpu_kernel void @fsub_bf16() #0 {
164+
; GFX1250-LABEL: 'fsub_bf16'
165+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bf16 = fsub bfloat undef, undef
166+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2bf16 = fsub <2 x bfloat> undef, undef
167+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3bf16 = fsub <3 x bfloat> undef, undef
168+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4bf16 = fsub <4 x bfloat> undef, undef
169+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5bf16 = fsub <5 x bfloat> undef, undef
170+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16bf16 = fsub <16 x bfloat> undef, undef
171+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v17bf16 = fsub <17 x bfloat> undef, undef
172+
; GFX1250-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
173+
;
174+
; GFX1250-SIZE-LABEL: 'fsub_bf16'
175+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = fsub bfloat undef, undef
176+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = fsub <2 x bfloat> undef, undef
177+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3bf16 = fsub <3 x bfloat> undef, undef
178+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = fsub <4 x bfloat> undef, undef
179+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5bf16 = fsub <5 x bfloat> undef, undef
180+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16bf16 = fsub <16 x bfloat> undef, undef
181+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17bf16 = fsub <17 x bfloat> undef, undef
182+
; GFX1250-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
183+
;
184+
%bf16 = fsub bfloat undef, undef
185+
%v2bf16 = fsub <2 x bfloat> undef, undef
186+
%v3bf16 = fsub <3 x bfloat> undef, undef
187+
%v4bf16 = fsub <4 x bfloat> undef, undef
188+
%v5bf16 = fsub <5 x bfloat> undef, undef
189+
%v16bf16 = fsub <16 x bfloat> undef, undef
190+
%v17bf16 = fsub <17 x bfloat> undef, undef
191+
ret void
192+
}

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49641,6 +49641,7 @@ define <3 x bfloat> @v_fma_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b, <3 x bfloat>
4964149641
; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
4964249642
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v3, 16
4964349643
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
49644+
<<<<<<< HEAD
4964449645
;
4964549646
; GFX1250-LABEL: v_fma_v3bf16:
4964649647
; GFX1250: ; %bb.0:
@@ -49649,6 +49650,15 @@ define <3 x bfloat> @v_fma_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b, <3 x bfloat>
4964949650
; GFX1250-NEXT: v_pk_fma_bf16 v0, v0, v2, v4
4965049651
; GFX1250-NEXT: v_pk_fma_bf16 v1, v1, v3, v5
4965149652
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
49653+
=======
49654+
; GFX1250-LABEL: v_fma_v3bf16:
49655+
; GFX1250: %bb.0:
49656+
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
49657+
; GFX1250-NEXT: s_wait_kmcnt 0x0
49658+
; GFX1250-NEXT: v_pk_fma_bf16 v0, v0, v2, v4
49659+
; GFX1250-NEXT: v_pk_fma_bf16 v1, v1, v3, v5
49660+
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
49661+
>>>>>>> cc3762e87c75 (Add testing coverage - part I)
4965249662
%op = call <3 x bfloat> @llvm.fma.v3bf16(<3 x bfloat> %a, <3 x bfloat> %b, <3 x bfloat> %c)
4965349663
ret <3 x bfloat> %op
4965449664
}

0 commit comments

Comments
 (0)