Skip to content

Commit b43c97c

Browse files
authored
[Headers][X86] amxintrin.h - fix attributes according to Intel SDM (#122204)
`tileloadd`, `tileloaddt1` and `tilestored` are part of `amx-tile` feature. The problem is observed if `__tile_loadd` intrinsic is invoked, `_tile_loadd_internal` requiring `amx-int8` is inlined into `__tile_loadd` that has only `amx-tile`.
1 parent 3c9c94a commit b43c97c

File tree

3 files changed

+40
-33
lines changed

3 files changed

+40
-33
lines changed

clang/lib/Headers/amxintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,15 +234,15 @@ typedef int _tile1024i_1024a
234234
__attribute__((__vector_size__(1024), __aligned__(1024)));
235235

236236
/// This is internal intrinsic. C/C++ user should avoid calling it directly.
237-
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
237+
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
238238
_tile_loadd_internal(unsigned short m, unsigned short n, const void *base,
239239
__SIZE_TYPE__ stride) {
240240
return __builtin_ia32_tileloadd64_internal(m, n, base,
241241
(__SIZE_TYPE__)(stride));
242242
}
243243

244244
/// This is internal intrinsic. C/C++ user should avoid calling it directly.
245-
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
245+
static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
246246
_tile_loaddt1_internal(unsigned short m, unsigned short n, const void *base,
247247
__SIZE_TYPE__ stride) {
248248
return __builtin_ia32_tileloaddt164_internal(m, n, base,
@@ -278,7 +278,7 @@ _tile_dpbuud_internal(unsigned short m, unsigned short n, unsigned short k,
278278
}
279279

280280
/// This is internal intrinsic. C/C++ user should avoid calling it directly.
281-
static __inline__ void __DEFAULT_FN_ATTRS_INT8
281+
static __inline__ void __DEFAULT_FN_ATTRS_TILE
282282
_tile_stored_internal(unsigned short m, unsigned short n, void *base,
283283
__SIZE_TYPE__ stride, _tile1024i tile) {
284284
return __builtin_ia32_tilestored64_internal(m, n, base,

clang/test/CodeGen/X86/amx_api.c

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,6 @@ void test_api(int cond, short row, short col) {
3333
__tile_stored(buf, STRIDE, c);
3434
}
3535

36-
void test_tile_loadd(short row, short col) {
37-
//CHECK-LABEL: @test_tile_loadd
38-
//CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
39-
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
40-
__tile1024i a = {row, col};
41-
__tile_loadd(&a, buf, STRIDE);
42-
}
43-
44-
void test_tile_stream_loadd(short row, short col) {
45-
//CHECK-LABEL: @test_tile_stream_loadd
46-
//CHECK-DAG: call x86_amx @llvm.x86.tileloaddt164.internal
47-
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
48-
__tile1024i a = {row, col};
49-
__tile_stream_loadd(&a, buf, STRIDE);
50-
}
51-
5236
void test_tile_dpbssd(__tile1024i a, __tile1024i b, __tile1024i c) {
5337
//CHECK-LABEL: @test_tile_dpbssd
5438
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
@@ -81,20 +65,6 @@ void test_tile_dpbuud(__tile1024i a, __tile1024i b, __tile1024i c) {
8165
__tile_dpbuud(&c, a, b);
8266
}
8367

84-
void test_tile_stored(__tile1024i c) {
85-
//CHECK-LABEL: @test_tile_stored
86-
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
87-
//CHECK-DAG: call void @llvm.x86.tilestored64.internal
88-
__tile_stored(buf, STRIDE, c);
89-
}
90-
91-
void test_tile_zero(__tile1024i c) {
92-
//CHECK-LABEL: @test_tile_zero
93-
//CHECK-DAG: call x86_amx @llvm.x86.tilezero.internal
94-
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
95-
__tile_zero(&c);
96-
}
97-
9868
void test_tile_dpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
9969
//CHECK-LABEL: @test_tile_dpbf16ps
10070
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})

clang/test/CodeGen/X86/amx_tile.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile \
2+
// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
3+
4+
#include <immintrin.h>
5+
6+
char buf[1024];
7+
#define STRIDE 32
8+
9+
void test_tile_loadd(short row, short col) {
10+
//CHECK-LABEL: @test_tile_loadd
11+
//CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
12+
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
13+
__tile1024i a = {row, col};
14+
__tile_loadd(&a, buf, STRIDE);
15+
}
16+
17+
void test_tile_stream_loadd(short row, short col) {
18+
//CHECK-LABEL: @test_tile_stream_loadd
19+
//CHECK-DAG: call x86_amx @llvm.x86.tileloaddt164.internal
20+
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
21+
__tile1024i a = {row, col};
22+
__tile_stream_loadd(&a, buf, STRIDE);
23+
}
24+
25+
void test_tile_stored(__tile1024i c) {
26+
//CHECK-LABEL: @test_tile_stored
27+
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
28+
//CHECK-DAG: call void @llvm.x86.tilestored64.internal
29+
__tile_stored(buf, STRIDE, c);
30+
}
31+
32+
void test_tile_zero(__tile1024i c) {
33+
//CHECK-LABEL: @test_tile_zero
34+
//CHECK-DAG: call x86_amx @llvm.x86.tilezero.internal
35+
//CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
36+
__tile_zero(&c);
37+
}

0 commit comments

Comments
 (0)