Skip to content

Commit a8a0040

Browse files
committed
Address review comments
1 parent 96146a0 commit a8a0040

File tree

4 files changed

+16
-10
lines changed

4 files changed

+16
-10
lines changed

clang/lib/Headers/amxbf16transposeintrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
/// The 1st source tile. Max size is 1024 Bytes.
5858
/// \param b
5959
/// The 2nd source tile. Max size is 1024 Bytes.
60-
#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps(dst, a, b)
60+
#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b))
6161

6262
/// This is internal intrinsic. C/C++ user should avoid calling it directly.
6363
static __inline__ _tile1024i __DEFAULT_FN_ATTRS

clang/lib/Headers/amxcomplextransposeintrin.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@
6363
/// The 1st source tile. Max size is 1024 Bytes.
6464
/// \param b
6565
/// The 2nd source tile. Max size is 1024 Bytes.
66-
#define _tile_tcmmimfp16ps(dst, a, b) __builtin_ia32_ttcmmimfp16ps(dst, a, b)
66+
#define _tile_tcmmimfp16ps(dst, a, b) \
67+
__builtin_ia32_ttcmmimfp16ps((dst), (a), (b))
6768

6869
/// Perform matrix multiplication of two tiles containing complex elements and
6970
/// accumulate the results into a packed single precision tile. Each dword
@@ -108,7 +109,8 @@
108109
/// The 1st source tile. Max size is 1024 Bytes.
109110
/// \param b
110111
/// The 2nd source tile. Max size is 1024 Bytes.
111-
#define _tile_tcmmrlfp16ps(dst, a, b) __builtin_ia32_ttcmmrlfp16ps(dst, a, b)
112+
#define _tile_tcmmrlfp16ps(dst, a, b) \
113+
__builtin_ia32_ttcmmrlfp16ps((dst), (a), (b))
112114

113115
/// Perform matrix conjugate transpose and multiplication of two tiles
114116
/// containing complex elements and accumulate the results into a packed
@@ -155,7 +157,7 @@
155157
/// \param b
156158
/// The 2nd source tile. Max size is 1024 Bytes.
157159
#define _tile_conjtcmmimfp16ps(dst, a, b) \
158-
__builtin_ia32_tconjtcmmimfp16ps(dst, a, b)
160+
__builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b))
159161

160162
/// Perform conjugate transpose of an FP16-pair of complex elements from \a a
161163
/// and writes the result to \a dst.
@@ -184,7 +186,7 @@
184186
/// The destination tile. Max size is 1024 Bytes.
185187
/// \param a
186188
/// The source tile. Max size is 1024 Bytes.
187-
#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16(dst, a)
189+
#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a))
188190

189191
static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal(
190192
unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
@@ -204,8 +206,8 @@ static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal(
204206
return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2);
205207
}
206208

207-
static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtfp16_internal(
208-
unsigned short m, unsigned short n, _tile1024i src) {
209+
static __inline__ _tile1024i __DEFAULT_FN_ATTRS
210+
_tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) {
209211
return __builtin_ia32_tconjtfp16_internal(m, n, src);
210212
}
211213

clang/lib/Headers/amxfp16transposeintrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
/// The 1st source tile. Max size is 1024 Bytes.
5858
/// \param b
5959
/// The 2nd source tile. Max size is 1024 Bytes.
60-
#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps(dst, a, b)
60+
#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b))
6161

6262
/// This is internal intrinsic. C/C++ user should avoid calling it directly.
6363
static __inline__ _tile1024i __DEFAULT_FN_ATTRS

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37549,8 +37549,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3754937549
case X86::PTCONJTCMMIMFP16PS:
3755037550
Opc = X86::TCONJTCMMIMFP16PS;
3755137551
break;
37552-
case X86::PTMMULTF32PS: Opc = X86::TMMULTF32PS; break;
37553-
case X86::PTTMMULTF32PS: Opc = X86::TTMMULTF32PS; break;
37552+
case X86::PTMMULTF32PS:
37553+
Opc = X86::TMMULTF32PS;
37554+
break;
37555+
case X86::PTTMMULTF32PS:
37556+
Opc = X86::TTMMULTF32PS;
37557+
break;
3755437558
}
3755537559

3755637560
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));

0 commit comments

Comments
 (0)