Skip to content

Commit 5c0d176

Browse files
committed
[LLVM] Custom intrinsics for half-precision ops on x86
1 parent f318c56 commit 5c0d176

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

src/llvm_eval.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,36 +16,36 @@
1616

1717
#define def_fma_vec_f16_intrinsic() \
1818
fmt_intrinsic( \
19-
"define internal fastcc <$w x half> @fma.v$wf16(<$w x half> %a, <$w x half> %b, <$w x half> %c) #0 ${\n" \
19+
"define internal <$w x half> @fma.v$wf16(<$w x half> %a, <$w x half> %b, <$w x half> %c) #0 ${\n" \
2020
" %a_f32 = fpext <$w x half> %a to <$w x float>\n" \
2121
" %b_f32 = fpext <$w x half> %b to <$w x float>\n" \
2222
" %c_f32 = fpext <$w x half> %c to <$w x float>\n" \
2323
" %out_f32 = call fast <$w x float> @llvm.fma.v$wf32(<$w x float> %a_f32, <$w x float> %b_f32, <$w x float> %c_f32)\n" \
2424
" %out = fptrunc <$w x float> %out_f32 to <$w x half>\n" \
2525
" ret <$w x half> %out\n" \
2626
"$}" \
27-
);
27+
)
2828

2929
#define def_minnum_vec_f16_intrinsic() \
3030
fmt_intrinsic( \
31-
"define internal fastcc <$w x half> @minnum.v$wf16(<$w x half> %a, <$w x half> %b) #0 ${\n" \
31+
"define internal <$w x half> @minnum.v$wf16(<$w x half> %a, <$w x half> %b) local_unnamed_addr #0 ${\n" \
3232
" %a_f32 = fpext <$w x half> %a to <$w x float>\n" \
3333
" %b_f32 = fpext <$w x half> %b to <$w x float>\n" \
3434
" %out_f32 = call fast <$w x float> @llvm.minnum.v$wf32(<$w x float> %a_f32, <$w x float> %b_f32)\n" \
3535
" %out = fptrunc <$w x float> %out_f32 to <$w x half>\n" \
3636
" ret <$w x half> %out\n" \
3737
"$}" \
38-
);
38+
)
3939

4040
#define def_maxnum_vec_f16_intrinsic() \
4141
fmt_intrinsic( \
42-
"define internal fastcc <$w x half> @maxnum.v$wf16(<$w x half> %a, <$w x half> %b) #0 ${\n" \
42+
"define internal <$w x half> @maxnum.v$wf16(<$w x half> %a, <$w x half> %b) local_unnamed_addr #0 ${\n" \
4343
" %a_f32 = fpext <$w x half> %a to <$w x float>\n" \
4444
" %b_f32 = fpext <$w x half> %b to <$w x float>\n" \
4545
" %out_f32 = call fast <$w x float> @llvm.maxnum.v$wf32(<$w x float> %a_f32, <$w x float> %b_f32)\n" \
4646
" %out = fptrunc <$w x float> %out_f32 to <$w x half>\n" \
4747
" ret <$w x half> %out\n" \
4848
"$}" \
49-
);
49+
)
5050

5151
#endif

0 commit comments

Comments
 (0)