Skip to content

Commit 8bad4c6

Browse files
committed
[AMDGPU] Legalize fminimum and fmaximum for gfx950
Original patches by Matthew Arsenault.
1 parent 6babd63 commit 8bad4c6

File tree

7 files changed

+930
-964
lines changed

7 files changed

+930
-964
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,22 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
861861
if (Subtarget->hasIEEEMinMax()) {
862862
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
863863
{MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
864+
} else {
865+
// FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum
866+
if (Subtarget->hasMinimum3Maximum3F32())
867+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
868+
869+
if (Subtarget->hasMinimum3Maximum3PKF16()) {
870+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::v2f16, Legal);
871+
872+
// If only the vector form is available, we need to widen to a vector.
873+
if (!Subtarget->hasMinimum3Maximum3F16())
874+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
875+
}
876+
}
877+
878+
if (Subtarget->hasVOP3PInsts()) {
879+
// We want to break these into v2f16 pieces, not scalarize.
864880
setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
865881
{MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
866882
Custom);

llvm/test/Analysis/CostModel/AMDGPU/maximum.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,19 @@ define void @maximum_f16() {
1111
; GFX950-FASTF64-LABEL: 'maximum_f16'
1212
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
1313
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
14-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
15-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
16-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
17-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
14+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
15+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
16+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
17+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
1818
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
1919
;
2020
; GFX9-LABEL: 'maximum_f16'
2121
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
2222
; GFX9-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
23-
; GFX9-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
24-
; GFX9-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
25-
; GFX9-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
26-
; GFX9-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
23+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
24+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
25+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
26+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
2727
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
2828
;
2929
; SLOWF64-LABEL: 'maximum_f16'
@@ -38,10 +38,10 @@ define void @maximum_f16() {
3838
; GFX9-SIZE-LABEL: 'maximum_f16'
3939
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
4040
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
41-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
42-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
43-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
44-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
41+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
42+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
43+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
44+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
4545
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
4646
;
4747
; SLOW-SIZE-LABEL: 'maximum_f16'

llvm/test/Analysis/CostModel/AMDGPU/minimum.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,19 @@ define void @minimum_f16() {
1111
; GFX950-FASTF64-LABEL: 'minimum_f16'
1212
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
1313
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
14-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
15-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
16-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
17-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
14+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
15+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
16+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
17+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
1818
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
1919
;
2020
; GFX9-LABEL: 'minimum_f16'
2121
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
2222
; GFX9-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
23-
; GFX9-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
24-
; GFX9-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
25-
; GFX9-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
26-
; GFX9-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
23+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
24+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
25+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
26+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
2727
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
2828
;
2929
; SLOWF64-LABEL: 'minimum_f16'
@@ -38,10 +38,10 @@ define void @minimum_f16() {
3838
; GFX9-SIZE-LABEL: 'minimum_f16'
3939
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
4040
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
41-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
42-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
43-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
44-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
41+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
42+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
43+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
44+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
4545
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
4646
;
4747
; SLOW-SIZE-LABEL: 'minimum_f16'

0 commit comments

Comments
 (0)