Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -428,20 +428,46 @@ ggml_bf16_t ggml_fp32_to_bf16(float x) {
}

void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
for (int64_t i = 0; i < n; i++) {
int i = 0;
#if defined(__F16C__)
for (; i + 7 < n; i += 8) {
__m128i x_i = _mm_loadu_si128((__m128i *)(x + i));
__m256 y_v = _mm256_cvtph_ps(x_i);
_mm256_storeu_ps(y + i, y_v);
}
#endif
for (; i < n; i++) {
y[i] = GGML_FP16_TO_FP32(x[i]);
}
}

void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
int i = 0;
#if defined(__F16C__)
for (; i + 7 < n; i += 8) {
__m256 x_v = _mm256_loadu_ps(x + i);
__m128i y_v = _mm256_cvtps_ph(x_v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
_mm_storeu_si128((__m128i *)(y + i), y_v);
}
#endif
for (; i < n; ++i) {
y[i] = GGML_FP32_TO_FP16(x[i]);
}
}

void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
int i = 0;
#if defined(__AVX512F__)
for (; i + 15 < n; i += 16) {
__m512 y_v = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(_mm256_loadu_si256((__m256i *)(x + i))), 16));
_mm512_storeu_ps(y + i, y_v);
}
#elif defined(__AVX2__)
for (; i + 7 < n; i += 8) {
__m256 y_v = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(x + i))), 16));
_mm256_storeu_ps(y + i, y_v);
}
#endif
for (; i < n; ++i) {
y[i] = GGML_BF16_TO_FP32(x[i]);
}
Expand Down
Loading