Commit f75860f
authored
[AArch64] Implement NEON FP8 intrinsics for fused multiply-add (#123615)
This patch adds the following intrinsics:
* Fused multiply-add non-indexed
float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t,
mfloat8x16_t, fpm_t)
float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t,
mfloat8x16_t, fpm_t)
float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t,
mfloat8x16_t, fpm_t)
float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t,
mfloat8x16_t, fpm_t)
* Floating-point multiply-add long to half-precision (vector, by
element)
float16x8_t vmlalbq_lane_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vmlalbq_laneq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vmlaltq_lane_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vmlaltq_laneq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
* Floating-point multiply-add long-long to single-precision (vector, by
element)
float32x4_t vmlallbbq_lane_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallbbq_laneq_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallbtq_lane_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallbtq_laneq_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlalltbq_lane_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlalltbq_laneq_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallttq_lane_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x8_t vm, __builtin_constant_p(lane), fpm_t fpm)
float32x4_t vmlallttq_laneq_f32_mf8_fpm(float32x4_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)1 parent f949f87 commit f75860f
File tree
9 files changed
+717
-44
lines changed- clang
- include/clang/Basic
- lib/CodeGen
- test
- CodeGen/AArch64/fp8-intrinsics
- Sema/aarch64-fp8-intrinsics
- llvm
- include/llvm/IR
- lib/Target/AArch64
- test/CodeGen/AArch64
9 files changed
+717
-44
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
2161 | 2161 | | |
2162 | 2162 | | |
2163 | 2163 | | |
| 2164 | + | |
| 2165 | + | |
| 2166 | + | |
| 2167 | + | |
| 2168 | + | |
| 2169 | + | |
| 2170 | + | |
| 2171 | + | |
| 2172 | + | |
| 2173 | + | |
| 2174 | + | |
| 2175 | + | |
| 2176 | + | |
| 2177 | + | |
| 2178 | + | |
| 2179 | + | |
| 2180 | + | |
| 2181 | + | |
| 2182 | + | |
| 2183 | + | |
| 2184 | + | |
| 2185 | + | |
| 2186 | + | |
| 2187 | + | |
2164 | 2188 | | |
2165 | 2189 | | |
2166 | 2190 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
6759 | 6759 | | |
6760 | 6760 | | |
6761 | 6761 | | |
6762 | | - | |
| 6762 | + | |
| 6763 | + | |
6763 | 6764 | | |
6764 | | - | |
| 6765 | + | |
| 6766 | + | |
| 6767 | + | |
6765 | 6768 | | |
6766 | | - | |
| 6769 | + | |
6767 | 6770 | | |
6768 | 6771 | | |
6769 | 6772 | | |
6770 | | - | |
| 6773 | + | |
6771 | 6774 | | |
6772 | 6775 | | |
6773 | 6776 | | |
6774 | 6777 | | |
6775 | 6778 | | |
6776 | 6779 | | |
6777 | | - | |
| 6780 | + | |
6778 | 6781 | | |
6779 | 6782 | | |
6780 | 6783 | | |
6781 | 6784 | | |
6782 | | - | |
6783 | | - | |
6784 | | - | |
| 6785 | + | |
| 6786 | + | |
| 6787 | + | |
| 6788 | + | |
| 6789 | + | |
| 6790 | + | |
| 6791 | + | |
| 6792 | + | |
| 6793 | + | |
| 6794 | + | |
| 6795 | + | |
| 6796 | + | |
| 6797 | + | |
| 6798 | + | |
| 6799 | + | |
| 6800 | + | |
6785 | 6801 | | |
6786 | 6802 | | |
6787 | 6803 | | |
| |||
6802 | 6818 | | |
6803 | 6819 | | |
6804 | 6820 | | |
6805 | | - | |
6806 | | - | |
6807 | | - | |
| 6821 | + | |
6808 | 6822 | | |
6809 | 6823 | | |
6810 | 6824 | | |
| |||
12779 | 12793 | | |
12780 | 12794 | | |
12781 | 12795 | | |
12782 | | - | |
| 12796 | + | |
12783 | 12797 | | |
12784 | 12798 | | |
12785 | 12799 | | |
| |||
14054 | 14068 | | |
14055 | 14069 | | |
14056 | 14070 | | |
14057 | | - | |
| 14071 | + | |
14058 | 14072 | | |
14059 | 14073 | | |
14060 | 14074 | | |
14061 | 14075 | | |
14062 | | - | |
| 14076 | + | |
14063 | 14077 | | |
14064 | 14078 | | |
14065 | 14079 | | |
14066 | 14080 | | |
14067 | 14081 | | |
14068 | 14082 | | |
14069 | | - | |
| 14083 | + | |
14070 | 14084 | | |
14071 | 14085 | | |
14072 | 14086 | | |
14073 | 14087 | | |
14074 | | - | |
| 14088 | + | |
| 14089 | + | |
| 14090 | + | |
| 14091 | + | |
| 14092 | + | |
| 14093 | + | |
| 14094 | + | |
| 14095 | + | |
| 14096 | + | |
| 14097 | + | |
| 14098 | + | |
| 14099 | + | |
| 14100 | + | |
| 14101 | + | |
| 14102 | + | |
| 14103 | + | |
| 14104 | + | |
| 14105 | + | |
| 14106 | + | |
| 14107 | + | |
| 14108 | + | |
| 14109 | + | |
| 14110 | + | |
| 14111 | + | |
| 14112 | + | |
| 14113 | + | |
| 14114 | + | |
| 14115 | + | |
| 14116 | + | |
| 14117 | + | |
| 14118 | + | |
| 14119 | + | |
| 14120 | + | |
| 14121 | + | |
| 14122 | + | |
| 14123 | + | |
| 14124 | + | |
| 14125 | + | |
| 14126 | + | |
| 14127 | + | |
| 14128 | + | |
| 14129 | + | |
| 14130 | + | |
| 14131 | + | |
| 14132 | + | |
| 14133 | + | |
| 14134 | + | |
| 14135 | + | |
| 14136 | + | |
| 14137 | + | |
| 14138 | + | |
| 14139 | + | |
| 14140 | + | |
| 14141 | + | |
| 14142 | + | |
| 14143 | + | |
| 14144 | + | |
| 14145 | + | |
| 14146 | + | |
| 14147 | + | |
| 14148 | + | |
| 14149 | + | |
14075 | 14150 | | |
14076 | 14151 | | |
14077 | 14152 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
4692 | 4692 | | |
4693 | 4693 | | |
4694 | 4694 | | |
4695 | | - | |
| 4695 | + | |
4696 | 4696 | | |
4697 | | - | |
| 4697 | + | |
4698 | 4698 | | |
4699 | 4699 | | |
4700 | 4700 | | |
4701 | 4701 | | |
4702 | | - | |
| 4702 | + | |
| 4703 | + | |
| 4704 | + | |
| 4705 | + | |
| 4706 | + | |
4703 | 4707 | | |
4704 | 4708 | | |
4705 | 4709 | | |
| |||
0 commit comments