@@ -1475,7 +1475,7 @@ void SQ4BitGemmM4Kernel_CompInt8_ScaleFp16_Impl(size_t BlkLen,
14751475 std::byte * QuantBDataPtr = (std::byte *) QuantBData + //
14761476 n * BlockCountK * BlkLen / 2 + // b data
14771477 n * BlockCountK * sizeof (uint8_t ) + // zp
1478- n * BlockCountK * sizeof (__fp16 ); // scale
1478+ n * BlockCountK * sizeof (_Float16 ); // scale
14791479 float * CPtr = C + n;
14801480 if (NBLKS < 16 ) {
14811481 CPtr = tmp;
@@ -1656,7 +1656,7 @@ void SQ4BitGemmM4Kernel_CompInt8_ScaleFp16_Impl(size_t BlkLen,
16561656 size_t NBLKS = (CountN - n) > 16 ? 16 : CountN - n;
16571657 std::byte * QuantBDataPtr = (std::byte *) QuantBData + //
16581658 n * BlockCountK * BlkLen / 2 + // b data
1659- n * BlockCountK * sizeof (__fp16 ); // scale
1659+ n * BlockCountK * sizeof (_Float16 ); // scale
16601660 float * CPtr = C + n;
16611661 if (NBLKS < 16 ) {
16621662 CPtr = tmp;
@@ -2216,7 +2216,7 @@ void SQ4BitGemmM1Kernel_CompInt8_ScaleFp16_Impl(size_t BlkLen,
22162216 std::byte * QuantBDataPtr = (std::byte *) QuantBData + //
22172217 n * BlockCountK * BlkLen / 2 + // b data
22182218 n * BlockCountK * sizeof (uint8_t ) + // zp
2219- n * BlockCountK * sizeof (__fp16 ); // scale
2219+ n * BlockCountK * sizeof (_Float16 ); // scale
22202220 float * CPtr = C + n;
22212221 size_t cnt = BlockCountK;
22222222 if (Bias != nullptr ) {
@@ -2455,7 +2455,7 @@ void SQ4BitGemmM1Kernel_CompInt8_ScaleFp16_Impl(size_t BlkLen,
24552455 size_t nblks = (CountN - n) > 16 ? 16 : CountN - n;
24562456 std::byte * QuantBDataPtr = (std::byte *) QuantBData + //
24572457 n * BlockCountK * BlkLen / 2 + // b data
2458- n * BlockCountK * sizeof (__fp16 ); // scale
2458+ n * BlockCountK * sizeof (_Float16 ); // scale
24592459 float * CPtr = C + n;
24602460 size_t cnt = BlockCountK;
24612461 if (Bias != nullptr ) {
0 commit comments