Skip to content

Commit c74dd30

Browse files
committed
AMDGPU: Add round-to-odd rounding during f64 to bf16 conversion
Remove incorrect f64->f16 patterns.
1 parent b9bdfea commit c74dd30

File tree

2 files changed

+3
-7
lines changed

2 files changed

+3
-7
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6905,15 +6905,16 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
69056905
}
69066906

69076907
assert(DstVT.getScalarType() == MVT::bf16 &&
6908-
"custom lower FP_ROUND for f16 or bf16");
6908+
"custom lower FP_ROUND for f16 or bf16");
69096909
assert(Subtarget->hasBF16ConversionInsts() && "f32 -> bf16 is legal");
69106910

69116911
// Round-inexact-to-odd f64 to f32, then do the final rounding using the
69126912
// hardware f32 -> bf16 instruction.
69136913
EVT F32VT = SrcVT.isVector() ? SrcVT.changeVectorElementType(MVT::f32) :
69146914
MVT::f32;
69156915
SDValue Rod = expandRoundInexactToOdd(F32VT, Src, DL, DAG);
6916-
return getFPExtOrFPRound(DAG, Rod, DL, DstVT);
6916+
return DAG.getNode(ISD::FP_ROUND, DL, DstVT, Rod,
6917+
DAG.getTargetConstant(0, DL, MVT::i32));
69176918
}
69186919

69196920
SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,16 +1443,11 @@ let SubtargetPredicate = HasBF16ConversionInsts in {
14431443
}
14441444
def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)),
14451445
(V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
1446-
def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)),
1447-
(V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
1448-
0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
14491446
def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
14501447
(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
14511448
(V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
14521449
def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
14531450
(V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
1454-
def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers)))),
1455-
(V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>;
14561451
}
14571452

14581453
class Cvt_Scale_Sr_F32ToBF16F16_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType DstTy> : GCNPat<

0 commit comments

Comments
 (0)