We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a88843a commit 70ff4e6Copy full SHA for 70ff4e6
ggml/src/ggml-impl.h
@@ -428,17 +428,9 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
428
429
// TODO: Determine if inline assembly is faster
430
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
431
- float f;
432
- __asm__ (
433
- "vlvgp %%v0, %1, %1\n"
434
- "vreph %%v0, %%v0, 3\n"
435
- "vcnf %%v0, %%v0, 0, 1\n"
436
- "vclfnh %%v0, %%v0, 2, 0\n"
437
- "ler %0, %%f0\n" :
438
- /* out */ "=f"(f) :
439
- /* in */ "r"(h) :
440
- /* clobber */ "v0", "f0");
441
- return f;
+ uint16x8_t v_h = vec_splats(h);
+ uint16x8_t nnpa_dlf16 = vec_convert_from_fp16(v_h, 0);
+ return vec_extend_to_fp32_hi(nnpa_dlf16, 0)[0];
442
}
443
444
0 commit comments