Commit f22dfd5
committed
[AArch64] Implement FP8 SVE intrinsics for fused multiply-add
This patch adds the following intrinsics:
* 8-bit floating-point multiply-add long to half-precision (bottom).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat16_t svmlalb[_f16_mf8]_fpm(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm);
svfloat16_t svmlalb[_n_f16_mf8]_fpm(svfloat16_t zda, svmfloat8_t zn, mfloat8_t zm, fpm_t fpm);
* 8-bit floating-point multiply-add long to half-precision (bottom, indexed).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat16_t svmlalb_lane[_f16_mf8]_fpm(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm,
uint64_t imm0_15, fpm_t fpm);
* 8-bit floating-point multiply-add long to half-precision (top).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat16_t svmlalt[_f16_mf8]_fpm(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm);
svfloat16_t svmlalt[_n_f16_mf8]_fpm(svfloat16_t zda, svmfloat8_t zn, mfloat8_t zm, fpm_t fpm);
* 8-bit floating-point multiply-add long to half-precision (top, indexed).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat16_t svmlalt_lane[_f16_mf8]_fpm(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm,
uint64_t imm0_15, fpm_t fpm);
* 8-bit floating-point multiply-add long long to single-precision (bottom bottom).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat32_t svmlallbb[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm);
svfloat32_t svmlallbb[_n_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, mfloat8_t zm, fpm_t fpm);
* 8-bit floating-point multiply-add long long to single-precision (bottom bottom, indexed).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat32_t svmlallbb_lane[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm,
uint64_t imm0_15, fpm_t fpm);
* 8-bit floating-point multiply-add long long to single-precision (bottom top).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat32_t svmlallbt[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm);
svfloat32_t svmlallbt[_n_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, mfloat8_t zm, fpm_t fpm);
* 8-bit floating-point multiply-add long long to single-precision (bottom top, indexed).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat32_t svmlallbt_lane[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm,
uint64_t imm0_15, fpm_t fpm);
* 8-bit floating-point multiply-add long long to single-precision (top bottom).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat32_t svmlalltb[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm);
svfloat32_t svmlalltb[_n_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, mfloat8_t zm, fpm_t fpm);
* 8-bit floating-point multiply-add long long to single-precision (top bottom, indexed).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat32_t svmlalltb_lane[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm,
uint64_t imm0_15, fpm_t fpm);
* 8-bit floating-point multiply-add long long to single-precision (top top).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat32_t svmlalltt[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm);
svfloat32_t svmlalltt[_n_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, mfloat8_t zm, fpm_t fpm);
* 8-bit floating-point multiply-add long long to single-precision (top top, indexed).
// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8FMA) || __ARM_FEATURE_SSVE_FP8FMA
svfloat32_t svmlalltt_lane[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm,
uint64_t imm0_15, fpm_t fpm);1 parent cde9701 commit f22dfd5
File tree
7 files changed
+636
-16
lines changed- clang
- include/clang/Basic
- test
- CodeGen/AArch64/fp8-intrinsics
- Sema/aarch64-sve2-intrinsics
- llvm
- include/llvm/IR
- lib/Target/AArch64
- test/CodeGen/AArch64
7 files changed
+636
-16
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
2495 | 2495 | | |
2496 | 2496 | | |
2497 | 2497 | | |
| 2498 | + | |
| 2499 | + | |
| 2500 | + | |
| 2501 | + | |
| 2502 | + | |
| 2503 | + | |
| 2504 | + | |
| 2505 | + | |
| 2506 | + | |
| 2507 | + | |
| 2508 | + | |
| 2509 | + | |
| 2510 | + | |
| 2511 | + | |
| 2512 | + | |
| 2513 | + | |
| 2514 | + | |
| 2515 | + | |
| 2516 | + | |
| 2517 | + | |
| 2518 | + | |
| 2519 | + | |
| 2520 | + | |
| 2521 | + | |
| 2522 | + | |
| 2523 | + | |
| 2524 | + | |
| 2525 | + | |
| 2526 | + | |
| 2527 | + | |
| 2528 | + | |
0 commit comments