Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions clang/include/clang/Basic/BuiltinsX86_64.def
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ TARGET_BUILTIN(__builtin_ia32_uwrmsr, "vULLiULLi", "n", "usermsr")
// AMX internal builtin
TARGET_BUILTIN(__builtin_ia32_tile_loadconfig_internal, "vvC*", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tileloadd64_internal, "V256iUsUsvC*z", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tileloaddrs64_internal, "V256iUsUsvC*z", "n", "amx-movrs")
TARGET_BUILTIN(__builtin_ia32_tileloaddt164_internal, "V256iUsUsvC*z", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tileloaddrst164_internal, "V256iUsUsvC*z", "n", "amx-movrs")
TARGET_BUILTIN(__builtin_ia32_tdpbssd_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
TARGET_BUILTIN(__builtin_ia32_tdpbsud_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
TARGET_BUILTIN(__builtin_ia32_tdpbusd_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
Expand All @@ -129,15 +131,27 @@ TARGET_BUILTIN(__builtin_ia32_tdpfp16ps_internal, "V256iUsUsUsV256iV256iV256i",
TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose")

// AMX
TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tilerelease, "v", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tilezero, "vUc", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs, "vIUcvC*z", "n", "amx-movrs,amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1, "vIUcvC*z", "n", "amx-movrs,amx-transpose")

TARGET_BUILTIN(__builtin_ia32_tileloaddrs64, "vIUcvC*z", "n", "amx-movrs")
TARGET_BUILTIN(__builtin_ia32_tileloaddrst164, "vIUcvC*z", "n", "amx-movrs")

TARGET_BUILTIN(__builtin_ia32_tileloadd64, "vIUcvC*z", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tileloaddt164, "vIUcvC*z", "n", "amx-tile")
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -6303,6 +6303,8 @@ def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>;
def mno_amx_tile : Flag<["-"], "mno-amx-tile">, Group<m_x86_Features_Group>;
def mamx_transpose : Flag<["-"], "mamx-transpose">, Group<m_x86_Features_Group>;
def mno_amx_transpose : Flag<["-"], "mno-amx-transpose">, Group<m_x86_Features_Group>;
def mamx_movrs: Flag<["-"], "mamx-movrs">, Group<m_x86_Features_Group>;
def mno_amx_movrs: Flag<["-"], "mno-amx-movrs">, Group<m_x86_Features_Group>;
def mcmpccxadd : Flag<["-"], "mcmpccxadd">, Group<m_x86_Features_Group>;
def mno_cmpccxadd : Flag<["-"], "mno-cmpccxadd">, Group<m_x86_Features_Group>;
def msse : Flag<["-"], "msse">, Group<m_x86_Features_Group>;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Basic/Targets/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAMXCOMPLEX = true;
} else if (Feature == "+amx-fp8") {
HasAMXFP8 = true;
} else if (Feature == "+amx-movrs") {
HasAMXMOVRS = true;
} else if (Feature == "+amx-transpose") {
HasAMXTRANSPOSE = true;
} else if (Feature == "+cmpccxadd") {
Expand Down Expand Up @@ -953,6 +955,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AMX_COMPLEX__");
if (HasAMXFP8)
Builder.defineMacro("__AMX_FP8__");
if (HasAMXMOVRS)
Builder.defineMacro("__AMX_MOVRS__");
if (HasAMXTRANSPOSE)
Builder.defineMacro("__AMX_TRANSPOSE__");
if (HasCMPCCXADD)
Expand Down Expand Up @@ -1085,6 +1089,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("amx-fp16", true)
.Case("amx-fp8", true)
.Case("amx-int8", true)
.Case("amx-movrs", true)
.Case("amx-tile", true)
.Case("amx-transpose", true)
.Case("avx", true)
Expand Down Expand Up @@ -1205,6 +1210,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("amx-fp16", HasAMXFP16)
.Case("amx-fp8", HasAMXFP8)
.Case("amx-int8", HasAMXINT8)
.Case("amx-movrs", HasAMXMOVRS)
.Case("amx-tile", HasAMXTILE)
.Case("amx-transpose", HasAMXTRANSPOSE)
.Case("avx", SSELevel >= AVX)
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAMXBF16 = false;
bool HasAMXCOMPLEX = false;
bool HasAMXFP8 = false;
bool HasAMXMOVRS = false;
bool HasAMXTRANSPOSE = false;
bool HasSERIALIZE = false;
bool HasTSXLDTRK = false;
Expand Down
18 changes: 17 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16996,25 +16996,41 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
// Corresponding to intrisics which will return 2 tiles (tile0_tile1).
case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: {
case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
Intrinsic::ID IID;
switch (BuiltinID) {
default:
llvm_unreachable("Unsupported intrinsic!");
case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
IID = Intrinsic::x86_t2rpntlvwz0_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
IID = Intrinsic::x86_t2rpntlvwz1_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
break;
case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
break;
}

// Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Headers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ set(x86_files
amxfp8intrin.h
amxintrin.h
amxtransposeintrin.h
amxmovrsintrin.h
avx10_2_512bf16intrin.h
avx10_2_512convertintrin.h
avx10_2_512minmaxintrin.h
Expand Down
54 changes: 54 additions & 0 deletions clang/lib/Headers/amxmovrsintrin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*===-------- amxmovrsintrin.h - AMX MOVRS intrinsics -*- C++ -*---------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* ===-------------------------------------------------------------------=== */

#ifndef __IMMINTRIN_H
#error "Never use <amxmovrsintrin.h> directly; include <immintrin.h> instead."
#endif /* __IMMINTRIN_H */

#ifndef __AMXMOVRSINTRIN_H
#define __AMXMOVRSINTRIN_H
#ifdef __x86_64__

#define __DEFAULT_FN_ATTRS_MOVRS \
__attribute__((__always_inline__, __nodebug__, __target__("amx-movrs")))
#define _tile_movadvise_load __builtin_ia32_tmovadvise_load
#define _tile_movadvise_store __builtin_ia32_tmovadvise_store
#define _tile_2rpntlvwz0advise(tdst, base, stride, Imm) \
__builtin_ia32_t2rpntlvwz0advise(tdst, base, stride, Imm)
#define _tile_2rpntlvwz1advise(tdst, base, stride, Imm) \
__builtin_ia32_t2rpntlvwz1advise(tdst, base, stride, Imm)

#define _tile_loaddrs(dst, base, stride) \
__builtin_ia32_tileloaddrs64((dst), ((const void *)(base)), \
(__SIZE_TYPE__)(stride))
#define _tile_stream_loaddrs(dst, base, stride) \
__builtin_ia32_tileloaddrst164((dst), ((const void *)(base)), \
(__SIZE_TYPE__)(stride))
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS
_tile_loaddrs_internal(unsigned short m, unsigned short n, const void *base,
__SIZE_TYPE__ stride) {
return __builtin_ia32_tileloaddrs64_internal(m, n, base,
(__SIZE_TYPE__)(stride));
}
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS
_tile_loaddrst1_internal(unsigned short m, unsigned short n, const void *base,
__SIZE_TYPE__ stride) {
return __builtin_ia32_tileloaddrst164_internal(m, n, base,
(__SIZE_TYPE__)(stride));
}
static __inline__ void __DEFAULT_FN_ATTRS_MOVRS
__tile_loaddrs(__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) {
dst->tile = _tile_loaddrs_internal(dst->row, dst->col, base, stride);
}
static __inline__ void __DEFAULT_FN_ATTRS_MOVRS __tile_stream_loaddrs(
__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) {
dst->tile = _tile_loaddrst1_internal(dst->row, dst->col, base, stride);
}
#undef __DEFAULT_FN_ATTRS_MOVRS
#endif /* __x86_64__ */
#endif /* __AMXMOVRSINTRIN_H */
Loading
Loading