Skip to content

Commit 4ce3544

Browse files
committed
Update intrinsics comments.
1 parent b9a8b40 commit 4ce3544

File tree

1 file changed

+22
-13
lines changed

1 file changed

+22
-13
lines changed

clang/lib/Headers/amxfp8intrin.h

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,69 +15,78 @@
1515
#define __AMXFP8INTRIN_H
1616
#ifdef __x86_64__
1717

18-
19-
/// Compute dot-product of brain-float8 (BF8) or hybrid-float8 (HF8)
20-
/// floating-point pairs in tiles \a a and \a b, accumulating the
21-
/// intermediate single-precision (32-bit) floating-point elements with
22-
/// elements in \a dst, and store the 32-bit result back to tile \a dst.
18+
/// These instructions compute dot product of brain-float8 (BF8) or
19+
/// hybrid-float8 (HF8) accumulating into a single precision (FP32). The input
20+
/// elements can be BF8 or HF8. These instructions have three tile operands, one
21+
/// source/dest accumulator operand, and two source operands, \a a and \a b. The
22+
/// \a a and \a b operands can be BF8 or HF8 independently, and the source/dest
23+
/// operand, \a dst is always FP32.
2324
///
2425
/// \headerfile <immintrin.h>
2526
///
2627
/// \code
2728
/// void _tile_dpbf8ps (__tile dst, __tile a, __tile b)
2829
/// \endcode
2930
///
30-
/// This intrinsic corresponds to the \c TDPBF8PS instruction.
31+
/// This intrinsic corresponds to the \c TDPBF8PS instruction, which is the dot
32+
/// product of a BF8 value (\a a) by a BF8 value (\a b) accumulating into a
33+
/// Single Precision (FP32) source/dest (\a dst).
3134
///
3235
/// \param dst
3336
/// The destination tile. Max size is 1024 Bytes.
3437
/// \param a
3538
/// The 1st source tile. Max size is 1024 Bytes.
3639
/// \param b
3740
/// The 2nd source tile. Max size is 1024 Bytes.
38-
#define _tile_dpbf8ps __builtin_ia32_tdpbf8ps
41+
#define _tile_dpbf8ps(dst, a, b) __builtin_ia32_tdpbf8ps((dst), (a), (b))
3942

4043
/// \code
4144
/// void _tile_dpbhf8ps (__tile dst, __tile a, __tile b)
4245
/// \endcode
4346
///
44-
/// This intrinsic corresponds to the \c TDPBHF8PS instruction.
47+
/// This intrinsic corresponds to the \c TDPBHF8PS instruction, which is the dot
48+
/// product of a BF8 value (\a a) by an HF8 value (\a b) accumulating into a
49+
/// Single Precision (FP32) source/dest (\a dst).
4550
///
4651
/// \param dst
4752
/// The destination tile. Max size is 1024 Bytes.
4853
/// \param a
4954
/// The 1st source tile. Max size is 1024 Bytes.
5055
/// \param b
5156
/// The 2nd source tile. Max size is 1024 Bytes.
52-
#define _tile_dpbhf8ps __builtin_ia32_tdpbhf8ps
57+
#define _tile_dpbhf8ps(dst, a, b) __builtin_ia32_tdpbhf8ps((dst), (a), (b))
5358

5459
/// \code
5560
/// void _tile_dphbf8ps (__tile dst, __tile a, __tile b)
5661
/// \endcode
5762
///
58-
/// This intrinsic corresponds to the \c TDPHBF8PS instruction.
63+
/// This intrinsic corresponds to the \c TDPHBF8PS instruction, which is the dot
64+
/// product of an HF8 value (\a a) by a BF8 value (\a b) accumulating into a
65+
/// Single Precision (FP32) source/dest (\a dst).
5966
///
6067
/// \param dst
6168
/// The destination tile. Max size is 1024 Bytes.
6269
/// \param a
6370
/// The 1st source tile. Max size is 1024 Bytes.
6471
/// \param b
6572
/// The 2nd source tile. Max size is 1024 Bytes.
66-
#define _tile_dphbf8ps __builtin_ia32_tdphbf8ps
73+
#define _tile_dphbf8ps(dst, a, b) __builtin_ia32_tdphbf8ps((dst), (a), (b))
6774

6875
/// \code
6976
/// void _tile_dphf8ps (__tile dst, __tile a, __tile b)
7077
/// \endcode
7178
///
72-
/// This intrinsic corresponds to the \c TDPHF8PS instruction.
79+
/// This intrinsic corresponds to the \c TDPHF8PS instruction, which is the dot
80+
/// product of an HF8 value (\a a) by an HF8 value (\a b) accumulating into a
81+
/// Single Precision (FP32) source/dest (\a dst).
7382
///
7483
/// \param dst
7584
/// The destination tile. Max size is 1024 Bytes.
7685
/// \param a
7786
/// The 1st source tile. Max size is 1024 Bytes.
7887
/// \param b
7988
/// The 2nd source tile. Max size is 1024 Bytes.
80-
#define _tile_dphf8ps __builtin_ia32_tdphf8ps
89+
#define _tile_dphf8ps(dst, a, b) __builtin_ia32_tdphf8ps((dst), (a), (b))
8190

8291
#endif /* __x86_64__ */
8392
#endif /* __AMXFP8INTRIN_H */

0 commit comments

Comments
 (0)