Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -676,5 +676,45 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")

// GFX1250 WMMA builtins
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x4_f32, "V8fIbV2fIbV2fIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x32_bf16, "V8fIbV16yIbV16yIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x32_bf16, "V8yIbV16yIbV16yIsV8yIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16, "V8yIbV16yIbV16yIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x32_f16, "V8fIbV16hIbV16hIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x32_f16, "V8hIbV16hIbV16hIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_32x16x128_f4, "V16fV16iV8iIsV16f", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_bf16, "V8fIbV16yIbV32yV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x64_bf16, "V8yIbV16yIbV32yV8yiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16, "V8fIbV16yIbV32yV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")

#undef BUILTIN
#undef TARGET_BUILTIN
199 changes: 198 additions & 1 deletion clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,46 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
// GFX1250 WMMA builtins
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {

// These operations perform a matrix multiplication and accumulation of
// the form:
Expand All @@ -837,6 +876,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
// "false".
bool AppendFalseForOpselArg = false;
unsigned BuiltinWMMAOp;
// Need return type when D and C are of different types.
bool NeedReturnType = false;

switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
Expand Down Expand Up @@ -975,6 +1016,160 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
break;
// GFX1250 WMMA builtins
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
ArgsForMatchingMatrixTypes = {5, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
ArgsForMatchingMatrixTypes = {5, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
ArgsForMatchingMatrixTypes = {5, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
ArgsForMatchingMatrixTypes = {5, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
ArgsForMatchingMatrixTypes = {5, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
NeedReturnType = true;
ArgsForMatchingMatrixTypes = {1, 5};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
ArgsForMatchingMatrixTypes = {3, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
ArgsForMatchingMatrixTypes = {4, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
ArgsForMatchingMatrixTypes = {3, 0, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
break;
}

SmallVector<Value *, 6> Args;
Expand All @@ -984,6 +1179,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Args.push_back(Builder.getFalse());

SmallVector<llvm::Type *, 6> ArgTypes;
if (NeedReturnType)
ArgTypes.push_back(ConvertType(E->getType()));
for (auto ArgIdx : ArgsForMatchingMatrixTypes)
ArgTypes.push_back(Args[ArgIdx]->getType());

Expand Down
Loading
Loading