Skip to content

Commit fd570cb

Browse files
committed
1 parent 1fe8e78 commit fd570cb

File tree

24 files changed

+327
-0
lines changed

24 files changed

+327
-0
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ X86 Support
642642

643643
- Supported intrinsics for ``MOVRS AND AVX10.2``.
644644
* Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
645+
- Support ISA of ``AMX-FP8``.
645646

646647
Arm and AArch64 Support
647648
^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/BuiltinsX86_64.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,12 @@ TARGET_BUILTIN(__builtin_ia32_cmpccxadd64, "SLLiv*SLLiSLLiIi", "n", "cmpccxadd")
155155
// AMX_FP16 FP16
156156
TARGET_BUILTIN(__builtin_ia32_tdpfp16ps, "vIUcIUcIUc", "n", "amx-fp16")
157157

158+
// AMX FP8
159+
TARGET_BUILTIN(__builtin_ia32_tdpbf8ps, "vIUcUIcUIc", "n", "amx-fp8")
160+
TARGET_BUILTIN(__builtin_ia32_tdpbhf8ps, "vIUcUIcUIc", "n", "amx-fp8")
161+
TARGET_BUILTIN(__builtin_ia32_tdphbf8ps, "vIUcUIcUIc", "n", "amx-fp8")
162+
TARGET_BUILTIN(__builtin_ia32_tdphf8ps, "vIUcUIcUIc", "n", "amx-fp8")
163+
158164
// RAO-INT
159165
TARGET_BUILTIN(__builtin_ia32_aadd64, "vv*SOi", "n", "raoint")
160166
TARGET_BUILTIN(__builtin_ia32_aand64, "vv*SOi", "n", "raoint")

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6290,6 +6290,8 @@ def mamx_fp16 : Flag<["-"], "mamx-fp16">, Group<m_x86_Features_Group>;
62906290
def mno_amx_fp16 : Flag<["-"], "mno-amx-fp16">, Group<m_x86_Features_Group>;
62916291
def mamx_int8 : Flag<["-"], "mamx-int8">, Group<m_x86_Features_Group>;
62926292
def mno_amx_int8 : Flag<["-"], "mno-amx-int8">, Group<m_x86_Features_Group>;
6293+
def mamx_fp8 : Flag<["-"], "mamx-fp8">, Group<m_x86_Features_Group>;
6294+
def mno_amx_fp8 : Flag<["-"], "mno-amx-fp8">, Group<m_x86_Features_Group>;
62936295
def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>;
62946296
def mno_amx_tile : Flag<["-"], "mno-amx-tile">, Group<m_x86_Features_Group>;
62956297
def mcmpccxadd : Flag<["-"], "mcmpccxadd">, Group<m_x86_Features_Group>;

clang/lib/Basic/Targets/X86.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
420420
HasAMXTILE = true;
421421
} else if (Feature == "+amx-complex") {
422422
HasAMXCOMPLEX = true;
423+
} else if (Feature == "+amx-fp8") {
424+
HasAMXFP8 = true;
423425
} else if (Feature == "+cmpccxadd") {
424426
HasCMPCCXADD = true;
425427
} else if (Feature == "+raoint") {
@@ -939,6 +941,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
939941
Builder.defineMacro("__AMX_FP16__");
940942
if (HasAMXCOMPLEX)
941943
Builder.defineMacro("__AMX_COMPLEX__");
944+
if (HasAMXFP8)
945+
Builder.defineMacro("__AMX_FP8__");
942946
if (HasCMPCCXADD)
943947
Builder.defineMacro("__CMPCCXADD__");
944948
if (HasRAOINT)
@@ -1069,6 +1073,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
10691073
.Case("amx-fp16", true)
10701074
.Case("amx-int8", true)
10711075
.Case("amx-tile", true)
1076+
.Case("amx-fp8", true)
10721077
.Case("avx", true)
10731078
.Case("avx10.1-256", true)
10741079
.Case("avx10.1-512", true)
@@ -1187,6 +1192,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
11871192
.Case("amx-fp16", HasAMXFP16)
11881193
.Case("amx-int8", HasAMXINT8)
11891194
.Case("amx-tile", HasAMXTILE)
1195+
.Case("amx-fp8", HasAMXFP8)
11901196
.Case("avx", SSELevel >= AVX)
11911197
.Case("avx10.1-256", HasAVX10_1)
11921198
.Case("avx10.1-512", HasAVX10_1_512)

clang/lib/Basic/Targets/X86.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
157157
bool HasAMXINT8 = false;
158158
bool HasAMXBF16 = false;
159159
bool HasAMXCOMPLEX = false;
160+
bool HasAMXFP8 = false;
160161
bool HasSERIALIZE = false;
161162
bool HasTSXLDTRK = false;
162163
bool HasUSERMSR = false;

clang/lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ set(x86_files
149149
amxcomplexintrin.h
150150
amxfp16intrin.h
151151
amxintrin.h
152+
amxfp8intrin.h
152153
avx10_2_512bf16intrin.h
153154
avx10_2_512convertintrin.h
154155
avx10_2_512minmaxintrin.h

clang/lib/Headers/amxfp8intrin.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*===---------- amxfp8intrin.h - AMX intrinsics -*- C++ -*------------===
2+
*
3+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
* See https://llvm.org/LICENSE.txt for license information.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
*===------------------------------------------------------------------------===
8+
*/
9+
10+
#ifndef __IMMINTRIN_H
11+
#error "Never use <amxfp8intrin.h> directly; include <immintrin.h> instead."
12+
#endif /* __IMMINTRIN_H */
13+
14+
#ifndef __AMXFP8INTRIN_H
15+
#define __AMXFP8INTRIN_H
16+
#ifdef __x86_64__
17+
18+
#define _tile_dpbf8ps __builtin_ia32_tdpbf8ps
19+
#define _tile_dpbhf8ps __builtin_ia32_tdpbhf8ps
20+
#define _tile_dphbf8ps __builtin_ia32_tdphbf8ps
21+
#define _tile_dphf8ps __builtin_ia32_tdphf8ps
22+
23+
#endif /* __x86_64__ */
24+
#endif /* __AMXFP8INTRIN_H */

clang/lib/Headers/immintrin.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,10 @@ _storebe_i64(void * __P, long long __D) {
648648
#include <amxcomplexintrin.h>
649649
#endif
650650

651+
#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP8__)
652+
#include <amxfp8intrin.h>
653+
#endif
654+
651655
#if !defined(__SCE__) || __has_feature(modules) || \
652656
defined(__AVX512VP2INTERSECT__)
653657
#include <avx512vp2intersectintrin.h>

clang/lib/Sema/SemaX86.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,10 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
640640
case X86::BI__builtin_ia32_tdpfp16ps:
641641
case X86::BI__builtin_ia32_tcmmimfp16ps:
642642
case X86::BI__builtin_ia32_tcmmrlfp16ps:
643+
case X86::BI__builtin_ia32_tdpbf8ps:
644+
case X86::BI__builtin_ia32_tdpbhf8ps:
645+
case X86::BI__builtin_ia32_tdphbf8ps:
646+
case X86::BI__builtin_ia32_tdphf8ps:
643647
return CheckBuiltinTileRangeAndDuplicate(TheCall, {0, 1, 2});
644648
}
645649
}

clang/test/CodeGen/X86/amx_fp8.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-fp8 \
2+
// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s
3+
#include <immintrin.h>
4+
5+
void test_amx(void *data) {
6+
//CHECK-LABEL: @test_amx
7+
//CHECK: call void @llvm.x86.tdpbf8ps(i8 1, i8 2, i8 3)
8+
_tile_dpbf8ps(1, 2, 3);
9+
}
10+
11+
void test_amx2(void *data) {
12+
//CHECK-LABEL: @test_amx2
13+
//CHECK: call void @llvm.x86.tdpbhf8ps(i8 1, i8 2, i8 3)
14+
_tile_dpbhf8ps(1, 2, 3);
15+
}
16+
17+
void test_amx3(void *data) {
18+
//CHECK-LABEL: @test_amx3
19+
//CHECK: call void @llvm.x86.tdphbf8ps(i8 1, i8 2, i8 3)
20+
_tile_dphbf8ps(1, 2, 3);
21+
}
22+
23+
void test_amx4(void *data) {
24+
//CHECK-LABEL: @test_amx4
25+
//CHECK: call void @llvm.x86.tdphf8ps(i8 1, i8 2, i8 3)
26+
_tile_dphf8ps(1, 2, 3);
27+
}

0 commit comments

Comments
 (0)