@@ -822,7 +822,46 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
822822 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
823823 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
824824 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
825- case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
825+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
826+ // GFX1250 WMMA builtins
827+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
828+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
829+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
830+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
831+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
832+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
833+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
834+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
835+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
836+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
837+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
838+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
839+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
840+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
841+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
842+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
843+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
844+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
845+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
846+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
847+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
848+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
849+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
850+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
851+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
852+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
853+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
854+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
855+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
856+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
857+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
858+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
859+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
860+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
861+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
862+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
863+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
864+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
826865
827866 // These operations perform a matrix multiplication and accumulation of
828867 // the form:
@@ -837,6 +876,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
837876 // "false".
838877 bool AppendFalseForOpselArg = false ;
839878 unsigned BuiltinWMMAOp;
879+ // Need return type when D and C are of different types.
880+ bool NeedReturnType = false ;
840881
841882 switch (BuiltinID) {
842883 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
@@ -975,6 +1016,160 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
9751016 ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 }; // CD, A, B, Index
9761017 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
9771018 break ;
1019+ // GFX1250 WMMA builtins
1020+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1021+ ArgsForMatchingMatrixTypes = {5 , 1 };
1022+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1023+ break ;
1024+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1025+ ArgsForMatchingMatrixTypes = {5 , 1 };
1026+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1027+ break ;
1028+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1029+ ArgsForMatchingMatrixTypes = {5 , 1 };
1030+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1031+ break ;
1032+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1033+ ArgsForMatchingMatrixTypes = {5 , 1 };
1034+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1035+ break ;
1036+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1037+ ArgsForMatchingMatrixTypes = {5 , 1 };
1038+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1039+ break ;
1040+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1041+ NeedReturnType = true ;
1042+ ArgsForMatchingMatrixTypes = {1 , 5 };
1043+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1044+ break ;
1045+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1046+ ArgsForMatchingMatrixTypes = {3 , 0 };
1047+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1048+ break ;
1049+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1050+ ArgsForMatchingMatrixTypes = {3 , 0 };
1051+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1052+ break ;
1053+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1054+ ArgsForMatchingMatrixTypes = {3 , 0 };
1055+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1056+ break ;
1057+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1058+ ArgsForMatchingMatrixTypes = {3 , 0 };
1059+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1060+ break ;
1061+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1062+ ArgsForMatchingMatrixTypes = {3 , 0 };
1063+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1064+ break ;
1065+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1066+ ArgsForMatchingMatrixTypes = {3 , 0 };
1067+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1068+ break ;
1069+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1070+ ArgsForMatchingMatrixTypes = {3 , 0 };
1071+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1072+ break ;
1073+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1074+ ArgsForMatchingMatrixTypes = {3 , 0 };
1075+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1076+ break ;
1077+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1078+ ArgsForMatchingMatrixTypes = {3 , 0 };
1079+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1080+ break ;
1081+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1082+ ArgsForMatchingMatrixTypes = {3 , 0 };
1083+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1084+ break ;
1085+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1086+ ArgsForMatchingMatrixTypes = {3 , 0 };
1087+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1088+ break ;
1089+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1090+ ArgsForMatchingMatrixTypes = {3 , 0 };
1091+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1092+ break ;
1093+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1094+ ArgsForMatchingMatrixTypes = {3 , 0 };
1095+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1096+ break ;
1097+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1098+ ArgsForMatchingMatrixTypes = {3 , 0 };
1099+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1100+ break ;
1101+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1102+ ArgsForMatchingMatrixTypes = {3 , 0 };
1103+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1104+ break ;
1105+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1106+ ArgsForMatchingMatrixTypes = {3 , 0 };
1107+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1108+ break ;
1109+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1110+ ArgsForMatchingMatrixTypes = {4 , 1 };
1111+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1112+ break ;
1113+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1114+ ArgsForMatchingMatrixTypes = {3 , 0 , 1 };
1115+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1116+ break ;
1117+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1118+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1119+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1120+ break ;
1121+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1122+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1123+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1124+ break ;
1125+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1126+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1127+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1128+ break ;
1129+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1130+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1131+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1132+ break ;
1133+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1134+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1135+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1136+ break ;
1137+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1138+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1139+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1140+ break ;
1141+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1142+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1143+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1144+ break ;
1145+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1146+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1147+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1148+ break ;
1149+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1150+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1151+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1152+ break ;
1153+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1154+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1155+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1156+ break ;
1157+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1158+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1159+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1160+ break ;
1161+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1162+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1163+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1164+ break ;
1165+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1166+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1167+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1168+ break ;
1169+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1170+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1171+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1172+ break ;
9781173 }
9791174
9801175 SmallVector<Value *, 6 > Args;
@@ -984,6 +1179,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
9841179 Args.push_back (Builder.getFalse ());
9851180
9861181 SmallVector<llvm::Type *, 6 > ArgTypes;
1182+ if (NeedReturnType)
1183+ ArgTypes.push_back (ConvertType (E->getType ()));
9871184 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
9881185 ArgTypes.push_back (Args[ArgIdx]->getType ());
9891186
0 commit comments