Skip to content

Commit b52aae1

Browse files
committed
[X86][AMX] Support AMX-MOVRS
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368
1 parent 390300d commit b52aae1

31 files changed

+1377
-6
lines changed

clang/include/clang/Basic/BuiltinsX86_64.def

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,9 @@ TARGET_BUILTIN(__builtin_ia32_uwrmsr, "vULLiULLi", "n", "usermsr")
117117
// AMX internal builtin
118118
TARGET_BUILTIN(__builtin_ia32_tile_loadconfig_internal, "vvC*", "n", "amx-tile")
119119
TARGET_BUILTIN(__builtin_ia32_tileloadd64_internal, "V256iUsUsvC*z", "n", "amx-tile")
120+
TARGET_BUILTIN(__builtin_ia32_tileloaddrs64_internal, "V256iUsUsvC*z", "n", "amx-movrs")
120121
TARGET_BUILTIN(__builtin_ia32_tileloaddt164_internal, "V256iUsUsvC*z", "n", "amx-tile")
122+
TARGET_BUILTIN(__builtin_ia32_tileloaddrst164_internal, "V256iUsUsvC*z", "n", "amx-movrs")
121123
TARGET_BUILTIN(__builtin_ia32_tdpbssd_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
122124
TARGET_BUILTIN(__builtin_ia32_tdpbsud_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
123125
TARGET_BUILTIN(__builtin_ia32_tdpbusd_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
@@ -129,15 +131,27 @@ TARGET_BUILTIN(__builtin_ia32_tdpfp16ps_internal, "V256iUsUsUsV256iV256iV256i",
129131
TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
130132
TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
131133
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
134+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
132135
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
136+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
133137
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
138+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
134139
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
140+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
135141
TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose")
142+
136143
// AMX
137144
TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
138145
TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")
139146
TARGET_BUILTIN(__builtin_ia32_tilerelease, "v", "n", "amx-tile")
140147
TARGET_BUILTIN(__builtin_ia32_tilezero, "vUc", "n", "amx-tile")
148+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
149+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
150+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
151+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
152+
153+
TARGET_BUILTIN(__builtin_ia32_tileloaddrs64, "vIUcvC*z", "n", "amx-movrs")
154+
TARGET_BUILTIN(__builtin_ia32_tileloaddrst164, "vIUcvC*z", "n", "amx-movrs")
141155

142156
TARGET_BUILTIN(__builtin_ia32_tileloadd64, "vIUcvC*z", "n", "amx-tile")
143157
TARGET_BUILTIN(__builtin_ia32_tileloaddt164, "vIUcvC*z", "n", "amx-tile")

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6303,6 +6303,8 @@ def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>;
63036303
def mno_amx_tile : Flag<["-"], "mno-amx-tile">, Group<m_x86_Features_Group>;
63046304
def mamx_transpose : Flag<["-"], "mamx-transpose">, Group<m_x86_Features_Group>;
63056305
def mno_amx_transpose : Flag<["-"], "mno-amx-transpose">, Group<m_x86_Features_Group>;
6306+
def mamx_movrs: Flag<["-"], "mamx-movrs">, Group<m_x86_Features_Group>;
6307+
def mno_amx_movrs: Flag<["-"], "mno-amx-movrs">, Group<m_x86_Features_Group>;
63066308
def mcmpccxadd : Flag<["-"], "mcmpccxadd">, Group<m_x86_Features_Group>;
63076309
def mno_cmpccxadd : Flag<["-"], "mno-cmpccxadd">, Group<m_x86_Features_Group>;
63086310
def msse : Flag<["-"], "msse">, Group<m_x86_Features_Group>;

clang/lib/Basic/Targets/X86.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
430430
HasAMXCOMPLEX = true;
431431
} else if (Feature == "+amx-fp8") {
432432
HasAMXFP8 = true;
433+
} else if (Feature == "+amx-movrs") {
434+
HasAMXMOVRS = true;
433435
} else if (Feature == "+amx-transpose") {
434436
HasAMXTRANSPOSE = true;
435437
} else if (Feature == "+cmpccxadd") {
@@ -953,6 +955,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
953955
Builder.defineMacro("__AMX_COMPLEX__");
954956
if (HasAMXFP8)
955957
Builder.defineMacro("__AMX_FP8__");
958+
if (HasAMXMOVRS)
959+
Builder.defineMacro("__AMX_MOVRS__");
956960
if (HasAMXTRANSPOSE)
957961
Builder.defineMacro("__AMX_TRANSPOSE__");
958962
if (HasCMPCCXADD)
@@ -1085,6 +1089,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
10851089
.Case("amx-fp16", true)
10861090
.Case("amx-fp8", true)
10871091
.Case("amx-int8", true)
1092+
.Case("amx-movrs", true)
10881093
.Case("amx-tile", true)
10891094
.Case("amx-transpose", true)
10901095
.Case("avx", true)
@@ -1205,6 +1210,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
12051210
.Case("amx-fp16", HasAMXFP16)
12061211
.Case("amx-fp8", HasAMXFP8)
12071212
.Case("amx-int8", HasAMXINT8)
1213+
.Case("amx-movrs", HasAMXMOVRS)
12081214
.Case("amx-tile", HasAMXTILE)
12091215
.Case("amx-transpose", HasAMXTRANSPOSE)
12101216
.Case("avx", SSELevel >= AVX)

clang/lib/Basic/Targets/X86.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
158158
bool HasAMXBF16 = false;
159159
bool HasAMXCOMPLEX = false;
160160
bool HasAMXFP8 = false;
161+
bool HasAMXMOVRS = false;
161162
bool HasAMXTRANSPOSE = false;
162163
bool HasSERIALIZE = false;
163164
bool HasTSXLDTRK = false;

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16996,25 +16996,41 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1699616996
}
1699716997
// Corresponding to intrisics which will return 2 tiles (tile0_tile1).
1699816998
case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
16999+
case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
1699917000
case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17001+
case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
1700017002
case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17001-
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: {
17003+
case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17004+
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17005+
case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
1700217006
Intrinsic::ID IID;
1700317007
switch (BuiltinID) {
1700417008
default:
1700517009
llvm_unreachable("Unsupported intrinsic!");
1700617010
case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
1700717011
IID = Intrinsic::x86_t2rpntlvwz0_internal;
1700817012
break;
17013+
case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17014+
IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17015+
break;
1700917016
case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
1701017017
IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
1701117018
break;
17019+
case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17020+
IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17021+
break;
1701217022
case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
1701317023
IID = Intrinsic::x86_t2rpntlvwz1_internal;
1701417024
break;
17025+
case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17026+
IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17027+
break;
1701517028
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
1701617029
IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
1701717030
break;
17031+
case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17032+
IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17033+
break;
1701817034
}
1701917035

1702017036
// Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)

clang/lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ set(x86_files
151151
amxfp8intrin.h
152152
amxintrin.h
153153
amxtransposeintrin.h
154+
amxmovrsintrin.h
154155
avx10_2_512bf16intrin.h
155156
avx10_2_512convertintrin.h
156157
avx10_2_512minmaxintrin.h

clang/lib/Headers/amxmovrsintrin.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*===-------- amxmovrsintrin.h - AMX MOVRS intrinsics -*- C++ -*---------===
2+
*
3+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
* See https://llvm.org/LICENSE.txt for license information.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
* ===-------------------------------------------------------------------=== */
8+
9+
#ifndef __IMMINTRIN_H
10+
#error "Never use <amxmovrsintrin.h> directly; include <immintrin.h> instead."
11+
#endif /* __IMMINTRIN_H */
12+
13+
#ifndef __AMXMOVRSINTRIN_H
14+
#define __AMXMOVRSINTRIN_H
15+
#ifdef __x86_64__
16+
17+
#define __DEFAULT_FN_ATTRS_MOVRS \
18+
__attribute__((__always_inline__, __nodebug__, __target__("amx-movrs")))
19+
#define _tile_movadvise_load __builtin_ia32_tmovadvise_load
20+
#define _tile_movadvise_store __builtin_ia32_tmovadvise_store
21+
#define _tile_2rpntlvwz0advise(tdst, base, stride, Imm) \
22+
__builtin_ia32_t2rpntlvwz0advise(tdst, base, stride, Imm)
23+
#define _tile_2rpntlvwz1advise(tdst, base, stride, Imm) \
24+
__builtin_ia32_t2rpntlvwz1advise(tdst, base, stride, Imm)
25+
26+
#define _tile_loaddrs(dst, base, stride) \
27+
__builtin_ia32_tileloaddrs64((dst), ((const void *)(base)), \
28+
(__SIZE_TYPE__)(stride))
29+
#define _tile_stream_loaddrs(dst, base, stride) \
30+
__builtin_ia32_tileloaddrst164((dst), ((const void *)(base)), \
31+
(__SIZE_TYPE__)(stride))
32+
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS
33+
_tile_loaddrs_internal(unsigned short m, unsigned short n, const void *base,
34+
__SIZE_TYPE__ stride) {
35+
return __builtin_ia32_tileloaddrs64_internal(m, n, base,
36+
(__SIZE_TYPE__)(stride));
37+
}
38+
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS
39+
_tile_loaddrst1_internal(unsigned short m, unsigned short n, const void *base,
40+
__SIZE_TYPE__ stride) {
41+
return __builtin_ia32_tileloaddrst164_internal(m, n, base,
42+
(__SIZE_TYPE__)(stride));
43+
}
44+
static __inline__ void __DEFAULT_FN_ATTRS_MOVRS
45+
__tile_loaddrs(__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) {
46+
dst->tile = _tile_loaddrs_internal(dst->row, dst->col, base, stride);
47+
}
48+
static __inline__ void __DEFAULT_FN_ATTRS_MOVRS __tile_stream_loaddrs(
49+
__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) {
50+
dst->tile = _tile_loaddrst1_internal(dst->row, dst->col, base, stride);
51+
}
52+
#undef __DEFAULT_FN_ATTRS_MOVRS
53+
#endif /* __x86_64__ */
54+
#endif /* __AMXMOVRSINTRIN_H */

0 commit comments

Comments
 (0)