Skip to content

Commit 55c08fe

Browse files
Made sparse amdgcn structs depict the actual builtin signature (halved A vector size)
Signed-off-by: Chris Tsiaousis <chris.tsiaousis@streamhpc.com>
1 parent 95d06ed commit 55c08fe

File tree

2 files changed

+8
-33
lines changed

2 files changed

+8
-33
lines changed

projects/composablekernel/include/ck_tile/core/arch/mma/sparse/mfma/sparse_gfx9.hpp

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,8 @@ struct amdgcn_mma<
4040
using OpType = MfmaOp;
4141
static constexpr MmaOpFamily OpFamily = MmaOpFamily::SPARSE;
4242

43-
static constexpr index_t ABVecN = 8;
44-
45-
using AVecType = ext_vector_t<fp16_t, ABVecN>;
46-
using BVecType = ext_vector_t<fp16_t, ABVecN>;
43+
using AVecType = ext_vector_t<fp16_t, 4>;
44+
using BVecType = ext_vector_t<fp16_t, 8>;
4745
using CVecType = ext_vector_t<fp32_t, 4>;
4846

4947
static constexpr index_t kAMBlock = 1;
@@ -62,22 +60,12 @@ struct amdgcn_mma<
6260
static constexpr index_t kCompressionRatio = 2;
6361

6462
CK_TILE_DEVICE static auto
65-
exec(AVecType& aVec, BVecType const& bVec, CVecType const& cVec) -> CVecType
63+
exec(AVecType const& aVec, BVecType const& bVec, CVecType const& cVec, int32_t idx) -> CVecType
6664
{
67-
static constexpr index_t CompressedSize = ABVecN / kCompressionRatio;
68-
using AVecCompressed = ext_vector_t<fp16_t, CompressedSize>;
69-
static_assert(CompressedSize == 4);
70-
// TODO: Compressing A on-the-fly should be OK for now, but we need to validate
71-
// and evaluate changing this to a transform at a higher level.
72-
// aVec not being const can cause problems when running multiple intrinsics.
73-
const int32_t idx = ck_tile::compress_a_impl<fp16_t, CompressedSize>(aVec);
74-
75-
const AVecCompressed a_vec_pruned = {aVec[0], aVec[1], aVec[2], aVec[3]};
76-
7765
using namespace sparse::detail;
7866
static constexpr BuiltinParams PARAMS = getBuiltinParams<CtrlFlags::CompressionIndex>();
7967
return {__builtin_amdgcn_smfmac_f32_16x16x32_f16(
80-
a_vec_pruned, bVec, cVec, idx, PARAMS.UseFirstIndex, PARAMS.ByteIndexToOverride)};
68+
aVec, bVec, cVec, idx, PARAMS.UseFirstIndex, PARAMS.ByteIndexToOverride)};
8169
}
8270
};
8371

projects/composablekernel/include/ck_tile/core/arch/mma/sparse/wmma/sparse_gfx12.hpp

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,8 @@ struct amdgcn_mma<fp16_t,
2929
using OpType = WmmaOp;
3030
static constexpr MmaOpFamily OpFamily = MmaOpFamily::SPARSE;
3131

32-
static constexpr index_t ABVecN = 16;
33-
34-
using AVecType = ext_vector_t<fp16_t, ABVecN>;
35-
using BVecType = ext_vector_t<fp16_t, ABVecN>;
32+
using AVecType = ext_vector_t<fp16_t, 8>;
33+
using BVecType = ext_vector_t<fp16_t, 16>;
3634
using CVecType = ext_vector_t<fp32_t, 8>;
3735

3836
static constexpr index_t kAMBlock = 1;
@@ -51,20 +49,9 @@ struct amdgcn_mma<fp16_t,
5149
static constexpr index_t kCompressionRatio = 2;
5250

5351
CK_TILE_DEVICE static auto
54-
exec(AVecType& aVec, BVecType const& bVec, CVecType const& cVec) -> CVecType
52+
exec(AVecType const& aVec, BVecType const& bVec, CVecType const& cVec, int32_t idx) -> CVecType
5553
{
56-
static constexpr index_t CompressedSize = ABVecN / kCompressionRatio;
57-
using AVecCompressed = ext_vector_t<fp16_t, CompressedSize>;
58-
static_assert(CompressedSize == 8);
59-
// TODO: Compressing A on-the-fly should be OK for now, but we need to validate
60-
// and evaluate changing this to a transform at a higher level.
61-
// aVec not being const can cause problems when running multiple intrinsics.
62-
const int32_t idx = ::ck_tile::compress_a_impl<fp16_t, CompressedSize>(aVec);
63-
64-
const AVecCompressed a_vec_pruned = {
65-
aVec[0], aVec[1], aVec[2], aVec[3], aVec[4], aVec[5], aVec[6], aVec[7]};
66-
67-
return {__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a_vec_pruned, bVec, cVec, idx)};
54+
return {__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(aVec, bVec, cVec, idx)};
6855
}
6956
};
7057

0 commit comments

Comments
 (0)