@@ -853,127 +853,6 @@ static ConverterT Fp16_to_Fp8E4M3FNUZ(AMD::ISAFamily isaFamily) {
853853 : Fp16_to_Fp8E4M3FNUZ_SW;
854854}
855855
856- // WARN: subnormal (0bs0000xxx) are not handled
857- static SmallVector<Value> Fp8E4M3_to_Bf16 (Location loc,
858- ConversionPatternRewriter &rewriter,
859- const SmallVector<Value> &v) {
860- auto fp8x4VecTy = vec_ty (i8_ty, 4 );
861- Value a0 = undef (fp8x4VecTy);
862- a0 = insert_element (fp8x4VecTy, a0, int_val (8 , 0 ), i32_val (0 ));
863- a0 = insert_element (fp8x4VecTy, a0, v[0 ], i32_val (1 ));
864- a0 = insert_element (fp8x4VecTy, a0, int_val (8 , 0 ), i32_val (2 ));
865- a0 = insert_element (fp8x4VecTy, a0, v[1 ], i32_val (3 ));
866- a0 = bitcast (a0, i32_ty);
867-
868- Value a1 = undef (fp8x4VecTy);
869- a1 = insert_element (fp8x4VecTy, a1, int_val (8 , 0 ), i32_val (0 ));
870- a1 = insert_element (fp8x4VecTy, a1, v[2 ], i32_val (1 ));
871- a1 = insert_element (fp8x4VecTy, a1, int_val (8 , 0 ), i32_val (2 ));
872- a1 = insert_element (fp8x4VecTy, a1, v[3 ], i32_val (3 ));
873- a1 = bitcast (a1, i32_ty);
874-
875- Value b0 = and_ (i32_ty, a0, i32_val (0x7fff7fff ));
876- Value b1 = and_ (i32_ty, a1, i32_val (0x7fff7fff ));
877- b0 = lshr (i32_ty, b0, i32_val (4 ));
878- b1 = lshr (i32_ty, b1, i32_val (4 ));
879-
880- b0 = add (i32_ty, b0, i32_val (0x3c003c00 ));
881- b1 = add (i32_ty, b1, i32_val (0x3c003c00 ));
882- Value sign0 = and_ (i32_ty, a0, i32_val (0x80008000 ));
883- Value sign1 = and_ (i32_ty, a1, i32_val (0x80008000 ));
884-
885- auto bf16x2VecTy = vec_ty (bf16_ty, 2 );
886- Value bf16x2Vec0 = or_ (i32_ty, sign0, b0);
887- Value bf16x2Vec1 = or_ (i32_ty, sign1, b1);
888- bf16x2Vec0 = bitcast (bf16x2Vec0, bf16x2VecTy);
889- bf16x2Vec1 = bitcast (bf16x2Vec1, bf16x2VecTy);
890-
891- return {extract_element (bf16_ty, bf16x2Vec0, i32_val (0 )),
892- extract_element (bf16_ty, bf16x2Vec0, i32_val (1 )),
893- extract_element (bf16_ty, bf16x2Vec1, i32_val (0 )),
894- extract_element (bf16_ty, bf16x2Vec1, i32_val (1 ))};
895- }
896-
897- static SmallVector<Value> Bf16_to_Fp8E4M3 (Location loc,
898- ConversionPatternRewriter &rewriter,
899- const SmallVector<Value> &v) {
900- auto bf16x2VecTy = vec_ty (bf16_ty, 2 );
901- Value bf16x2Vec0 = undef (bf16x2VecTy);
902- Value bf16x2Vec1 = undef (bf16x2VecTy);
903- bf16x2Vec0 = insert_element (bf16x2VecTy, bf16x2Vec0, v[0 ], i32_val (0 ));
904- bf16x2Vec0 = insert_element (bf16x2VecTy, bf16x2Vec0, v[1 ], i32_val (1 ));
905- bf16x2Vec1 = insert_element (bf16x2VecTy, bf16x2Vec1, v[2 ], i32_val (0 ));
906- bf16x2Vec1 = insert_element (bf16x2VecTy, bf16x2Vec1, v[3 ], i32_val (1 ));
907- bf16x2Vec0 = bitcast (bf16x2Vec0, i32_ty);
908- bf16x2Vec1 = bitcast (bf16x2Vec1, i32_ty);
909-
910- Value sign0 = and_ (i32_ty, bf16x2Vec0, i32_val (0x80008000 ));
911- Value sign1 = and_ (i32_ty, bf16x2Vec1, i32_val (0x80008000 ));
912- auto fp8x4VecTy = vec_ty (i8_ty, 4 );
913- Value sign = undef (fp8x4VecTy);
914- sign0 = bitcast (sign0, fp8x4VecTy);
915- sign1 = bitcast (sign1, fp8x4VecTy);
916- sign = insert_element (fp8x4VecTy, sign,
917- extract_element (i8_ty, sign0, i32_val (1 )), i32_val (0 ));
918- sign = insert_element (fp8x4VecTy, sign,
919- extract_element (i8_ty, sign0, i32_val (3 )), i32_val (1 ));
920- sign = insert_element (fp8x4VecTy, sign,
921- extract_element (i8_ty, sign1, i32_val (1 )), i32_val (2 ));
922- sign = insert_element (fp8x4VecTy, sign,
923- extract_element (i8_ty, sign1, i32_val (3 )), i32_val (3 ));
924- sign = bitcast (sign, i32_ty);
925-
926- Value nosign0 = and_ (i32_ty, bf16x2Vec0, i32_val (0x7fff7fff ));
927- Value nosign1 = and_ (i32_ty, bf16x2Vec1, i32_val (0x7fff7fff ));
928-
929- Value nosign_0_0 = and_ (i32_ty, nosign0, i32_val (0xffff0000 ));
930- nosign_0_0 = umax (i32_ty, nosign_0_0, i32_val (0x3c000000 ));
931- nosign_0_0 = umin (i32_ty, nosign_0_0, i32_val (0x43f00000 ));
932- Value nosign_0_1 = and_ (i32_ty, nosign0, i32_val (0x0000ffff ));
933- nosign_0_1 = umax (i32_ty, nosign_0_1, i32_val (0x3c00 ));
934- nosign_0_1 = umin (i32_ty, nosign_0_1, i32_val (0x43f0 ));
935- nosign0 = or_ (i32_ty, nosign_0_0, nosign_0_1);
936-
937- Value nosign_1_0 = and_ (i32_ty, nosign1, i32_val (0xffff0000 ));
938- nosign_1_0 = umax (i32_ty, nosign_1_0, i32_val (0x3c000000 ));
939- nosign_1_0 = umin (i32_ty, nosign_1_0, i32_val (0x43f00000 ));
940- Value nosign_1_1 = and_ (i32_ty, nosign1, i32_val (0x0000ffff ));
941- nosign_1_1 = umax (i32_ty, nosign_1_1, i32_val (0x3c00 ));
942- nosign_1_1 = umin (i32_ty, nosign_1_1, i32_val (0x43f0 ));
943- nosign1 = or_ (i32_ty, nosign_1_0, nosign_1_1);
944-
945- nosign0 = add (i32_ty, nosign0, i32_val (0x80008 ));
946- nosign1 = add (i32_ty, nosign1, i32_val (0x80008 ));
947- nosign0 = sub (i32_ty, nosign0, i32_val (0x3c003c00 ));
948- nosign1 = sub (i32_ty, nosign1, i32_val (0x3c003c00 ));
949- nosign0 = lshr (i32_ty, nosign0, i32_val (4 ));
950- nosign1 = lshr (i32_ty, nosign1, i32_val (4 ));
951-
952- nosign0 = bitcast (nosign0, fp8x4VecTy);
953- nosign1 = bitcast (nosign1, fp8x4VecTy);
954- Value nosign = undef (fp8x4VecTy);
955- nosign =
956- insert_element (fp8x4VecTy, nosign,
957- extract_element (i8_ty, nosign0, i32_val (0 )), i32_val (0 ));
958- nosign =
959- insert_element (fp8x4VecTy, nosign,
960- extract_element (i8_ty, nosign0, i32_val (2 )), i32_val (1 ));
961- nosign =
962- insert_element (fp8x4VecTy, nosign,
963- extract_element (i8_ty, nosign1, i32_val (0 )), i32_val (2 ));
964- nosign =
965- insert_element (fp8x4VecTy, nosign,
966- extract_element (i8_ty, nosign1, i32_val (2 )), i32_val (3 ));
967- nosign = bitcast (nosign, i32_ty);
968-
969- Value fp8x4Vec = or_ (i32_ty, nosign, sign);
970- fp8x4Vec = bitcast (fp8x4Vec, fp8x4VecTy);
971- return {extract_element (i8_ty, fp8x4Vec, i32_val (0 )),
972- extract_element (i8_ty, fp8x4Vec, i32_val (1 )),
973- extract_element (i8_ty, fp8x4Vec, i32_val (2 )),
974- extract_element (i8_ty, fp8x4Vec, i32_val (3 ))};
975- }
976-
977856template <typename SourceOp, typename DestOp>
978857struct ElementwiseOpConversion
979858 : public ElementwiseOpConversionBase<
0 commit comments