Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,7 @@ X86 Support
* Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
- Support ISA of ``AMX-FP8``.
- Support ISA of ``AMX-TRANSPOSE``.
- Support ISA of ``AMX-MOVRS``.
- Support ISA of ``AMX-AVX512``.
- Support ISA of ``AMX-TF32``.

Expand Down
14 changes: 14 additions & 0 deletions clang/include/clang/Basic/BuiltinsX86_64.def
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ TARGET_BUILTIN(__builtin_ia32_uwrmsr, "vULLiULLi", "n", "usermsr")
// AMX internal builtin
TARGET_BUILTIN(__builtin_ia32_tile_loadconfig_internal, "vvC*", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tileloadd64_internal, "V256iUsUsvC*z", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tileloaddrs64_internal, "V256iUsUsvC*z", "n", "amx-movrs")
TARGET_BUILTIN(__builtin_ia32_tileloaddt164_internal, "V256iUsUsvC*z", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tileloaddrst164_internal, "V256iUsUsvC*z", "n", "amx-movrs")
TARGET_BUILTIN(__builtin_ia32_tdpbssd_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
TARGET_BUILTIN(__builtin_ia32_tdpbsud_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
TARGET_BUILTIN(__builtin_ia32_tdpbusd_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
Expand All @@ -129,10 +131,15 @@ TARGET_BUILTIN(__builtin_ia32_tdpfp16ps_internal, "V256iUsUsUsV256iV256iV256i",
TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose")

TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, "V16fUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
Expand All @@ -147,6 +154,13 @@ TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tilerelease, "v", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tilezero, "vUc", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1, "vIUcvC*z", "n", "amx-movrs,amx-transpose")

TARGET_BUILTIN(__builtin_ia32_tileloaddrs64, "vIUcvC*z", "n", "amx-movrs")
TARGET_BUILTIN(__builtin_ia32_tileloaddrst164, "vIUcvC*z", "n", "amx-movrs")

TARGET_BUILTIN(__builtin_ia32_tileloadd64, "vIUcvC*z", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tileloaddt164, "vIUcvC*z", "n", "amx-tile")
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -6303,6 +6303,8 @@ def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>;
def mno_amx_tile : Flag<["-"], "mno-amx-tile">, Group<m_x86_Features_Group>;
def mamx_transpose : Flag<["-"], "mamx-transpose">, Group<m_x86_Features_Group>;
def mno_amx_transpose : Flag<["-"], "mno-amx-transpose">, Group<m_x86_Features_Group>;
def mamx_movrs: Flag<["-"], "mamx-movrs">, Group<m_x86_Features_Group>;
def mno_amx_movrs: Flag<["-"], "mno-amx-movrs">, Group<m_x86_Features_Group>;
def mcmpccxadd : Flag<["-"], "mcmpccxadd">, Group<m_x86_Features_Group>;
def mno_cmpccxadd : Flag<["-"], "mno-cmpccxadd">, Group<m_x86_Features_Group>;
def msse : Flag<["-"], "msse">, Group<m_x86_Features_Group>;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Basic/Targets/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAMXCOMPLEX = true;
} else if (Feature == "+amx-fp8") {
HasAMXFP8 = true;
} else if (Feature == "+amx-movrs") {
HasAMXMOVRS = true;
} else if (Feature == "+amx-transpose") {
HasAMXTRANSPOSE = true;
} else if (Feature == "+amx-avx512") {
Expand Down Expand Up @@ -957,6 +959,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AMX_COMPLEX__");
if (HasAMXFP8)
Builder.defineMacro("__AMX_FP8__");
if (HasAMXMOVRS)
Builder.defineMacro("__AMX_MOVRS__");
if (HasAMXTRANSPOSE)
Builder.defineMacro("__AMX_TRANSPOSE__");
if (HasAMXAVX512)
Expand Down Expand Up @@ -1094,6 +1098,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("amx-fp16", true)
.Case("amx-fp8", true)
.Case("amx-int8", true)
.Case("amx-movrs", true)
.Case("amx-tf32", true)
.Case("amx-tile", true)
.Case("amx-transpose", true)
Expand Down Expand Up @@ -1216,6 +1221,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("amx-fp16", HasAMXFP16)
.Case("amx-fp8", HasAMXFP8)
.Case("amx-int8", HasAMXINT8)
.Case("amx-movrs", HasAMXMOVRS)
.Case("amx-tf32", HasAMXTF32)
.Case("amx-tile", HasAMXTILE)
.Case("amx-transpose", HasAMXTRANSPOSE)
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAMXBF16 = false;
bool HasAMXCOMPLEX = false;
bool HasAMXFP8 = false;
bool HasAMXMOVRS = false;
bool HasAMXTRANSPOSE = false;
bool HasAMXAVX512 = false;
bool HasAMXTF32 = false;
Expand Down
18 changes: 17 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17025,25 +17025,41 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
// Corresponding to intrisics which will return 2 tiles (tile0_tile1).
case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: {
case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
Intrinsic::ID IID;
switch (BuiltinID) {
default:
llvm_unreachable("Unsupported intrinsic!");
case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
IID = Intrinsic::x86_t2rpntlvwz0_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
IID = Intrinsic::x86_t2rpntlvwz1_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
break;
}

// Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Headers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ set(x86_files
amxfp16intrin.h
amxfp8intrin.h
amxintrin.h
amxmovrsintrin.h
amxmovrstransposeintrin.h
amxtf32intrin.h
amxtf32transposeintrin.h
amxtransposeintrin.h
Expand Down
48 changes: 48 additions & 0 deletions clang/lib/Headers/amxmovrsintrin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*===-------- amxmovrsintrin.h - AMX MOVRS intrinsics -*- C++ -*---------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* ===-------------------------------------------------------------------=== */

#ifndef __IMMINTRIN_H
#error "Never use <amxmovrsintrin.h> directly; include <immintrin.h> instead."
#endif /* __IMMINTRIN_H */

#ifndef __AMXMOVRSINTRIN_H
#define __AMXMOVRSINTRIN_H
#ifdef __x86_64__

#define __DEFAULT_FN_ATTRS_MOVRS \
__attribute__((__always_inline__, __nodebug__, __target__("amx-movrs")))

#define _tile_loaddrs(dst, base, stride) \
__builtin_ia32_tileloaddrs64((dst), ((const void *)(base)), \
(__SIZE_TYPE__)(stride))
#define _tile_stream_loaddrs(dst, base, stride) \
__builtin_ia32_tileloaddrst164((dst), ((const void *)(base)), \
(__SIZE_TYPE__)(stride))
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS
_tile_loaddrs_internal(unsigned short m, unsigned short n, const void *base,
__SIZE_TYPE__ stride) {
return __builtin_ia32_tileloaddrs64_internal(m, n, base,
(__SIZE_TYPE__)(stride));
}
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS
_tile_loaddrst1_internal(unsigned short m, unsigned short n, const void *base,
__SIZE_TYPE__ stride) {
return __builtin_ia32_tileloaddrst164_internal(m, n, base,
(__SIZE_TYPE__)(stride));
}
static __inline__ void __DEFAULT_FN_ATTRS_MOVRS
__tile_loaddrs(__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) {
dst->tile = _tile_loaddrs_internal(dst->row, dst->col, base, stride);
}
static __inline__ void __DEFAULT_FN_ATTRS_MOVRS __tile_stream_loaddrs(
__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) {
dst->tile = _tile_loaddrst1_internal(dst->row, dst->col, base, stride);
}
#undef __DEFAULT_FN_ATTRS_MOVRS
#endif /* __x86_64__ */
#endif /* __AMXMOVRSINTRIN_H */
Loading
Loading