Skip to content

Commit 8f6c7aa

Browse files
[X86] Remove vector length (256 vs 512) distinction of AVX10 (#167736)
As in title. AVX10.x doesn't distinguish between available vector lengths. -mattr=avx10.x-512 and defining of macros with _512 is kept for compatibility. Bit-positions of avx10.1/2 features in compiler-rt and X86TargetParser are synced to match those in the gcc.
1 parent 20db716 commit 8f6c7aa

File tree

6 files changed

+110
-26
lines changed

6 files changed

+110
-26
lines changed

clang/lib/Headers/cpuid.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,6 @@
253253
#define bit_RDPRU 0x00000010
254254
#define bit_WBNOINVD 0x00000200
255255

256-
/* Features in %ebx for leaf 0x24 */
257-
#define bit_AVX10_256 0x00020000
258-
#define bit_AVX10_512 0x00040000
259-
260256
#ifdef __i386__
261257
#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
262258
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \

clang/test/CodeGen/attr-target-x86.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ __attribute__((target("fpmath=387")))
3333
void f_fpmath_387(void) {}
3434

3535
// CHECK-NOT: tune-cpu
36-
// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
36+
// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
3737
__attribute__((target("no-sse2")))
3838
void f_no_sse2(void) {}
3939

4040
// CHECK: [[f_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
4141
__attribute__((target("sse4")))
4242
void f_sse4(void) {}
4343

44-
// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
44+
// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
4545
__attribute__((target("no-sse4")))
4646
void f_no_sse4(void) {}
4747

compiler-rt/lib/builtins/cpu_model/x86.c

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,8 @@ enum ProcessorFeatures {
229229
FEATURE_SM4,
230230
FEATURE_APXF,
231231
FEATURE_USERMSR,
232-
FEATURE_AVX10_1_256,
233-
FEATURE_AVX10_1_512,
234-
FEATURE_AVX10_2_256,
235-
FEATURE_AVX10_2_512,
232+
FEATURE_AVX10_1 = 114,
233+
FEATURE_AVX10_2 = 116,
236234
FEATURE_MOVRS,
237235
CPU_FEATURE_MAX
238236
};
@@ -1093,18 +1091,11 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
10931091
bool HasLeaf24 =
10941092
MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
10951093
if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24) {
1096-
bool Has512Len = (EBX >> 18) & 1;
10971094
int AVX10Ver = EBX & 0xff;
1098-
if (AVX10Ver >= 2) {
1099-
setFeature(FEATURE_AVX10_2_256);
1100-
if (Has512Len)
1101-
setFeature(FEATURE_AVX10_2_512);
1102-
}
1103-
if (AVX10Ver >= 1) {
1104-
setFeature(FEATURE_AVX10_1_256);
1105-
if (Has512Len)
1106-
setFeature(FEATURE_AVX10_1_512);
1107-
}
1095+
if (AVX10Ver >= 1)
1096+
setFeature(FEATURE_AVX10_1);
1097+
if (AVX10Ver >= 2)
1098+
setFeature(FEATURE_AVX10_2);
11081099
}
11091100

11101101
unsigned MaxExtLevel = 0;

llvm/include/llvm/TargetParser/X86TargetParser.def

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,9 @@ X86_FEATURE_COMPAT(SM4, "sm4", 0)
261261
X86_FEATURE (EGPR, "egpr")
262262
X86_FEATURE_COMPAT(USERMSR, "usermsr", 0)
263263
X86_FEATURE_COMPAT(AVX10_1, "avx10.1", 36)
264-
X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37)
265-
X86_FEATURE_COMPAT(AVX10_2, "avx10.2", 0)
266-
X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512", 0)
264+
X86_FEATURE (DUMMYFEATURE3, "__dummyfeature3")
265+
X86_FEATURE_COMPAT(AVX10_2, "avx10.2", 37)
266+
X86_FEATURE (DUMMYFEATURE4, "__dummyfeature4")
267267
//FIXME: make MOVRS _COMPAT defined when gcc landed relate patch.
268268
X86_FEATURE (MOVRS, "movrs")
269269
X86_FEATURE (ZU, "zu")

llvm/lib/TargetParser/X86TargetParser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,8 @@ constexpr FeatureBitset ImpliedFeaturesX87 = {};
544544
constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
545545
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE1 = {};
546546
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE2 = {};
547+
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE3 = {};
548+
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE4 = {};
547549

548550
// Not really CPU features, but need to be in the table because clang uses
549551
// target features to communicate them to the backend.
@@ -644,8 +646,6 @@ constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
644646
FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16 |
645647
FeatureAVX512DQ | FeatureAVX512VL;
646648
constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1;
647-
constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = FeatureAVX10_1;
648-
constexpr FeatureBitset ImpliedFeaturesAVX10_2_512 = FeatureAVX10_2;
649649

650650
// APX Features
651651
constexpr FeatureBitset ImpliedFeaturesEGPR = {};
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
3+
; avx10.x-512 is just avx10.x -- 512 is kept for compatibility purposes.
4+
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 2>&1 | FileCheck --check-prefixes=CHECK-AVX10_1 %s
6+
7+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 2>&1 | FileCheck --check-prefixes=CHECK-AVX10_2 %s
8+
9+
; CHECK-AVX10_1-NOT: is not recognizable
10+
; CHECK-AVX10_2-NOT: is not recognizable
11+
12+
define <32 x bfloat> @foo_avx10.1(<16 x float> %a, <16 x float> %b) {
13+
; CHECK-AVX10_1-LABEL: foo_avx10.1:
14+
; CHECK-AVX10_1: # %bb.0:
15+
; CHECK-AVX10_1-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0
16+
; CHECK-AVX10_1-NEXT: retq
17+
;
18+
; CHECK-AVX10_2-LABEL: foo_avx10.1:
19+
; CHECK-AVX10_2: # %bb.0:
20+
; CHECK-AVX10_2-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0
21+
; CHECK-AVX10_2-NEXT: retq
22+
%ret = call <32 x bfloat> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %a, <16 x float> %b)
23+
ret <32 x bfloat> %ret
24+
}
25+
26+
define <8 x i32> @foo_avx10.2(<8 x double> %f) {
27+
; CHECK-AVX10_1-LABEL: foo_avx10.2:
28+
; CHECK-AVX10_1: # %bb.0:
29+
; CHECK-AVX10_1-NEXT: vextractf32x4 $2, %zmm0, %xmm1
30+
; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
31+
; CHECK-AVX10_1-NEXT: vmovsd {{.*#+}} xmm3 = [-2.147483648E+9,0.0E+0]
32+
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4
33+
; CHECK-AVX10_1-NEXT: vmovsd {{.*#+}} xmm5 = [2.147483647E+9,0.0E+0]
34+
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
35+
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
36+
; CHECK-AVX10_1-NEXT: xorl %eax, %eax
37+
; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2
38+
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
39+
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm1, %xmm2
40+
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm2, %xmm2
41+
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm2, %edx
42+
; CHECK-AVX10_1-NEXT: vucomisd %xmm1, %xmm1
43+
; CHECK-AVX10_1-NEXT: cmovpl %eax, %edx
44+
; CHECK-AVX10_1-NEXT: vmovd %edx, %xmm1
45+
; CHECK-AVX10_1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
46+
; CHECK-AVX10_1-NEXT: vextractf32x4 $3, %zmm0, %xmm2
47+
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4
48+
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
49+
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
50+
; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2
51+
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
52+
; CHECK-AVX10_1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
53+
; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
54+
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4
55+
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
56+
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
57+
; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2
58+
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
59+
; CHECK-AVX10_1-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1
60+
; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
61+
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4
62+
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
63+
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
64+
; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2
65+
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
66+
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm0, %xmm2
67+
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm2, %xmm2
68+
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm2, %edx
69+
; CHECK-AVX10_1-NEXT: vucomisd %xmm0, %xmm0
70+
; CHECK-AVX10_1-NEXT: cmovpl %eax, %edx
71+
; CHECK-AVX10_1-NEXT: vmovd %edx, %xmm2
72+
; CHECK-AVX10_1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
73+
; CHECK-AVX10_1-NEXT: vextractf128 $1, %ymm0, %xmm0
74+
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm0, %xmm4
75+
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
76+
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
77+
; CHECK-AVX10_1-NEXT: vucomisd %xmm0, %xmm0
78+
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
79+
; CHECK-AVX10_1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
80+
; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
81+
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm0, %xmm3
82+
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm3, %xmm3
83+
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm3, %ecx
84+
; CHECK-AVX10_1-NEXT: vucomisd %xmm0, %xmm0
85+
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
86+
; CHECK-AVX10_1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
87+
; CHECK-AVX10_1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
88+
; CHECK-AVX10_1-NEXT: retq
89+
;
90+
; CHECK-AVX10_2-LABEL: foo_avx10.2:
91+
; CHECK-AVX10_2: # %bb.0:
92+
; CHECK-AVX10_2-NEXT: vcvttpd2dqs %zmm0, %ymm0
93+
; CHECK-AVX10_2-NEXT: retq
94+
%x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f)
95+
ret <8 x i32> %x
96+
}
97+

0 commit comments

Comments
 (0)