@@ -80,7 +80,7 @@ namespace {
8080// Define commonly used chipsets versions for convenience.
8181constexpr Chipset kGfx908 = Chipset(9 , 0 , 8 );
8282constexpr Chipset kGfx90a = Chipset(9 , 0 , 0xa );
83- constexpr Chipset kGfx940 = Chipset(9 , 4 , 0 );
83+ constexpr Chipset kGfx942 = Chipset(9 , 4 , 2 );
8484
8585// / Define lowering patterns for raw buffer ops
8686template <typename GpuOp, typename Intrinsic>
@@ -483,7 +483,7 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
483483 destElem = destType.getElementType ();
484484
485485 if (sourceElem.isF32 () && destElem.isF32 ()) {
486- if (mfma.getReducePrecision () && chipset >= kGfx940 ) {
486+ if (mfma.getReducePrecision () && chipset >= kGfx942 ) {
487487 if (m == 32 && n == 32 && k == 4 && b == 1 )
488488 return ROCDL::mfma_f32_32x32x4_xf32::getOperationName ();
489489 if (m == 16 && n == 16 && k == 8 && b == 1 )
@@ -551,9 +551,9 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
551551 return ROCDL::mfma_i32_32x32x8i8::getOperationName ();
552552 if (m == 16 && n == 16 && k == 16 && b == 1 )
553553 return ROCDL::mfma_i32_16x16x16i8::getOperationName ();
554- if (m == 32 && n == 32 && k == 16 && b == 1 && chipset >= kGfx940 )
554+ if (m == 32 && n == 32 && k == 16 && b == 1 && chipset >= kGfx942 )
555555 return ROCDL::mfma_i32_32x32x16_i8::getOperationName ();
556- if (m == 16 && n == 16 && k == 32 && b == 1 && chipset >= kGfx940 )
556+ if (m == 16 && n == 16 && k == 32 && b == 1 && chipset >= kGfx942 )
557557 return ROCDL::mfma_i32_16x16x32_i8::getOperationName ();
558558 }
559559
@@ -565,7 +565,7 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
565565 }
566566
567567 if (isa<Float8E5M2FNUZType>(sourceElem) && destElem.isF32 () &&
568- chipset >= kGfx940 ) {
568+ chipset >= kGfx942 ) {
569569 // Known to be correct because there are no scalar f8 instructions and
570570 // because a length mismatch will have been caught by the verifier.
571571 Type sourceBElem =
@@ -585,7 +585,7 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
585585 }
586586
587587 if (isa<Float8E4M3FNUZType>(sourceElem) && destElem.isF32 () &&
588- chipset >= kGfx940 ) {
588+ chipset >= kGfx942 ) {
589589 Type sourceBElem =
590590 cast<VectorType>(mfma.getSourceB ().getType ()).getElementType ();
591591 if (m == 16 && n == 16 && k == 32 && b == 1 ) {
@@ -653,8 +653,8 @@ struct MFMAOpLowering : public ConvertOpToLLVMPattern<MFMAOp> {
653653 return op->emitOpError (" MFMA only supported on gfx908+" );
654654 uint32_t getBlgpField = static_cast <uint32_t >(op.getBlgp ());
655655 if (op.getNegateA () || op.getNegateB () || op.getNegateC ()) {
656- if (chipset < kGfx940 )
657- return op.emitOpError (" negation unsupported on older than gfx940 " );
656+ if (chipset < kGfx942 )
657+ return op.emitOpError (" negation unsupported on older than gfx942 " );
658658 getBlgpField |=
659659 op.getNegateA () | (op.getNegateB () << 1 ) | (op.getNegateC () << 2 );
660660 }
@@ -775,7 +775,7 @@ LogicalResult ExtPackedFp8OpLowering::matchAndRewrite(
775775 ExtPackedFp8Op op, ExtPackedFp8OpAdaptor adaptor,
776776 ConversionPatternRewriter &rewriter) const {
777777 Location loc = op.getLoc ();
778- if (chipset.majorVersion != 9 || chipset < kGfx940 )
778+ if (chipset.majorVersion != 9 || chipset < kGfx942 )
779779 return rewriter.notifyMatchFailure (
780780 loc, " Fp8 conversion instructions are not available on target "
781781 " architecture and their emulation is not implemented" );
@@ -819,7 +819,7 @@ LogicalResult PackedTrunc2xFp8OpLowering::matchAndRewrite(
819819 PackedTrunc2xFp8Op op, PackedTrunc2xFp8OpAdaptor adaptor,
820820 ConversionPatternRewriter &rewriter) const {
821821 Location loc = op.getLoc ();
822- if (chipset.majorVersion != 9 || chipset < kGfx940 )
822+ if (chipset.majorVersion != 9 || chipset < kGfx942 )
823823 return rewriter.notifyMatchFailure (
824824 loc, " Fp8 conversion instructions are not available on target "
825825 " architecture and their emulation is not implemented" );
@@ -856,7 +856,7 @@ LogicalResult PackedStochRoundFp8OpLowering::matchAndRewrite(
856856 PackedStochRoundFp8Op op, PackedStochRoundFp8OpAdaptor adaptor,
857857 ConversionPatternRewriter &rewriter) const {
858858 Location loc = op.getLoc ();
859- if (chipset.majorVersion != 9 || chipset < kGfx940 )
859+ if (chipset.majorVersion != 9 || chipset < kGfx942 )
860860 return rewriter.notifyMatchFailure (
861861 loc, " Fp8 conversion instructions are not available on target "
862862 " architecture and their emulation is not implemented" );
0 commit comments