header file comment format

BaiXilin · BaiXilin · commit fc78c90ab2bc · 2025-12-06T15:01:37.000-05:00
diff --git a/clang/lib/Headers/avxvnniint16intrin.h b/clang/lib/Headers/avxvnniint16intrin.h
@@ -15,11 +15,11 @@
 #ifndef __AVXVNNIINT16INTRIN_H
 #define __AVXVNNIINT16INTRIN_H
 
-// clang-format off
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+///    corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -41,21 +41,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+///		  SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
-// clang-format on
 #define _mm_dpwsud_epi32(__W, __A, __B)                                        \
   ((__m128i)__builtin_ia32_vpdpwsud128((__v4si)(__W), (__v8hi)(__A),           \
                                        (__v8hu)(__B)))
 
-// clang-format off
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+///    corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -77,22 +77,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
-/// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
+/// 	tmp2.dword :=
+///		  SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+///		dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
-// clang-format on
 #define _mm256_dpwsud_epi32(__W, __A, __B)                                     \
   ((__m256i)__builtin_ia32_vpdpwsud256((__v8si)(__W), (__v16hi)(__A),          \
                                        (__v16hu)(__B)))
 
-// clang-format off
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+///    corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and store
+///    the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -114,23 +113,22 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+///		  SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
-// clang-format on
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
 #define _mm_dpwsuds_epi32(__W, __A, __B)                                       \
   ((__m128i)__builtin_ia32_vpdpwsuds128((__v4si)(__W), (__v8hi)(__A),          \
                                         (__v8hu)(__B)))
 
-// clang-format off
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+///    corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and store
+///    the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -152,21 +150,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+///		  SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
-// clang-format on
 #define _mm256_dpwsuds_epi32(__W, __A, __B)                                    \
   ((__m256i)__builtin_ia32_vpdpwsuds256((__v8si)(__W), (__v16hi)(__A),         \
                                         (__v16hu)(__B)))
 
-// clang-format off
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding signed 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding signed 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -188,21 +186,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
-// clang-format on
 #define _mm_dpwusd_epi32(__W, __A, __B)                                        \
   ((__m128i)__builtin_ia32_vpdpwusd128((__v4si)(__W), (__v8hu)(__A),           \
                                        (__v8hi)(__B)))
 
-// clang-format off
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding signed 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding signed 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -224,22 +222,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
-// clang-format on
 #define _mm256_dpwusd_epi32(__W, __A, __B)                                     \
   ((__m256i)__builtin_ia32_vpdpwusd256((__v8si)(__W), (__v16hu)(__A),          \
                                        (__v16hi)(__B)))
 
-// clang-format off
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding signed 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding signed 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and
+///    store the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -261,22 +258,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
-// clang-format on
 #define _mm_dpwusds_epi32(__W, __A, __B)                                       \
   ((__m128i)__builtin_ia32_vpdpwusds128((__v4si)(__W), (__v8hu)(__A),          \
                                         (__v8hi)(__B)))
 
-// clang-format off
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding signed 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding signed 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and
+///    store the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -298,21 +294,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
-// clang-format on
 #define _mm256_dpwusds_epi32(__W, __A, __B)                                    \
   ((__m256i)__builtin_ia32_vpdpwusds256((__v8si)(__W), (__v16hu)(__A),         \
                                         (__v16hi)(__B)))
 
-// clang-format off
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -334,21 +330,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
-// clang-format on
 #define _mm_dpwuud_epi32(__W, __A, __B)                                        \
   ((__m128i)__builtin_ia32_vpdpwuud128((__v4si)(__W), (__v8hu)(__A),           \
                                        (__v8hu)(__B)))
 
-// clang-format off
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -370,22 +366,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
-// clang-format on
 #define _mm256_dpwuud_epi32(__W, __A, __B)                                     \
   ((__m256i)__builtin_ia32_vpdpwuud256((__v8si)(__W), (__v16hu)(__A),          \
                                        (__v16hu)(__B)))
 
-// clang-format off
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and store
+///    the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -407,22 +402,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
-// clang-format on
 #define _mm_dpwuuds_epi32(__W, __A, __B)                                       \
   ((__m128i)__builtin_ia32_vpdpwuuds128((__v4si)(__W), (__v8hu)(__A),          \
                                         (__v8hu)(__B)))
 
-// clang-format off
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and store
+///    the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -444,12 +438,12 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
-// clang-format on
 #define _mm256_dpwuuds_epi32(__W, __A, __B)                                    \
   ((__m256i)__builtin_ia32_vpdpwuuds256((__v8si)(__W), (__v16hu)(__A),         \
                                         (__v16hu)(__B)))