@@ -835,10 +835,17 @@ class ROCDL_ConcreteVector<Type elem, int length> :
835
835
836
836
def ROCDL_V2I16Type : ROCDL_ConcreteVector<I16, 2>;
837
837
def ROCDL_V2F16Type : ROCDL_ConcreteVector<F16, 2>;
838
+ def ROCDL_V2I32Type : ROCDL_ConcreteVector<I32, 2>;
838
839
def ROCDL_V2BF16Type : ROCDL_ConcreteVector<BF16, 2>;
839
840
def ROCDL_V2F32Type : ROCDL_ConcreteVector<F32, 2>;
841
+ def ROCDL_V3I32Type : ROCDL_ConcreteVector<I32, 3>;
840
842
def ROCDL_V6I32Type : ROCDL_ConcreteVector<I32, 6>;
841
843
def ROCDL_V8I32Type : ROCDL_ConcreteVector<I32, 8>;
844
+ def ROCDL_V8BF16Type : ROCDL_ConcreteVector<BF16, 8>;
845
+ def ROCDL_V8F16Type : ROCDL_ConcreteVector<F16, 8>;
846
+ def ROCDL_V8F32Type : ROCDL_ConcreteVector<F32, 8>;
847
+ def ROCDL_V16BF16Type : ROCDL_ConcreteVector<BF16, 16>;
848
+ def ROCDL_V16F16Type : ROCDL_ConcreteVector<F16, 16>;
842
849
def ROCDL_V16F32Type : ROCDL_ConcreteVector<F32, 16>;
843
850
def ROCDL_V32F16Type : ROCDL_ConcreteVector<F16, 32>;
844
851
def ROCDL_V32BF16Type : ROCDL_ConcreteVector<BF16, 32>;
@@ -975,6 +982,68 @@ class ScaleArgInfo<TypeConstraint argTyVal, string typeName> {
975
982
string nameForOp = typeName;
976
983
}
977
984
985
+ //===---------------------------------------------------------------------===//
986
+ // Scaled {fp4,bf8,fp8} to {bf16,f16,f32} conversion intrinsics
987
+ //===---------------------------------------------------------------------===//
988
+
989
+ foreach smallT = [
990
+ ScaleArgInfo<I32, "Fp4">,
991
+ ScaleArgInfo<ROCDL_V2I32Type, "Fp8">,
992
+ ScaleArgInfo<ROCDL_V2I32Type, "Bf8">
993
+ ] in {
994
+ foreach largeT = [
995
+ ScaleArgInfo<ROCDL_V8F16Type, "F16">,
996
+ ScaleArgInfo<ROCDL_V8BF16Type, "Bf16">,
997
+ ScaleArgInfo<ROCDL_V8F32Type, "F32">,
998
+ ] in {
999
+ def ROCDL_CvtPkScalePk8 # largeT.nameForOp # smallT.nameForOp # Op :
1000
+ ROCDL_ConcreteNonMemIntrOp<"cvt.scale.pk8." # largeT.name # "." # smallT.name,
1001
+ [Pure], 1, [2], ["scaleSel"]>,
1002
+ Arguments<(ins smallT.type:$src, I32:$scale, I32Attr:$scaleSel)> {
1003
+
1004
+ let summary = "Scales 8 " # smallT.name # " and converts them to 8 " # largeT.name # ".";
1005
+ let description = [{
1006
+ Available on gfx1250+.
1007
+ }];
1008
+ let results = (outs largeT.type:$res);
1009
+ let assemblyFormat = [{
1010
+ attr-dict $src `,` $scale `[` $scaleSel `]` `:` type($res)
1011
+ }];
1012
+ }
1013
+ } // foreach largeT
1014
+ } // foreach smallTOp
1015
+
1016
+ //===---------------------------------------------------------------------===//
1017
+ // Scaled {bf6,fp6} to {bf16,f16,f32} conversion intrinsics
1018
+ //===---------------------------------------------------------------------===//
1019
+
1020
+ foreach smallT = [
1021
+ ScaleArgInfo<ROCDL_V3I32Type, "Fp6">,
1022
+ ScaleArgInfo<ROCDL_V3I32Type, "Bf6">
1023
+ ] in {
1024
+ foreach largeT = [
1025
+ ScaleArgInfo<ROCDL_V16F16Type, "F16">,
1026
+ ScaleArgInfo<ROCDL_V16BF16Type, "Bf16">,
1027
+ ScaleArgInfo<ROCDL_V16F32Type, "F32">,
1028
+ ] in {
1029
+ def ROCDL_CvtPkScalePk16 # largeT.nameForOp # smallT.nameForOp # Op :
1030
+ ROCDL_ConcreteNonMemIntrOp<"cvt.scale.pk16." # largeT.name # "." # smallT.name,
1031
+ [Pure], 1, [2], ["scaleSel"]>,
1032
+ Arguments<(ins smallT.type:$src, I32:$scale, I32Attr:$scaleSel)> {
1033
+
1034
+ let summary = "Scales 16 " # smallT.name # " and converts them to 16 " # largeT.name # ".";
1035
+ let description = [{
1036
+ Available on gfx1250+.
1037
+ }];
1038
+ let results = (outs largeT.type:$res);
1039
+ let assemblyFormat = [{
1040
+ attr-dict $src `,` $scale `[` $scaleSel `]` `:` type($res)
1041
+ }];
1042
+
1043
+ }
1044
+ } // foreach largeT
1045
+ } // foreach smallTOp
1046
+
978
1047
//===---------------------------------------------------------------------===//
979
1048
// Scaled 32x6-bit float float conversion intrinsics
980
1049
//===---------------------------------------------------------------------===//
0 commit comments