Skip to content

Commit 1798672

Browse files
committed
Refactor and cleanup tests
Changed all constants to splats Removed const_2pow51_times_2 as folds early Remove unnecessary acc masks Change numbered vars to more readable names Remove attributes and use RUN lines Remove dso_local/noundef/local_unnamed_addr
1 parent 6338a61 commit 1798672

File tree

1 file changed

+63
-189
lines changed

1 file changed

+63
-189
lines changed
Lines changed: 63 additions & 189 deletions
Original file line numberDiff line numberDiff line change
@@ -1,217 +1,91 @@
1-
; RUN: llc < %s -O1 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
1+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avxifma | FileCheck %s --check-prefixes=X64,AVX
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512ifma | FileCheck %s --check-prefixes=X64,AVX512,AVX512-NOVL
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=X64,AVX512,AVX512VL
24

35
; 67108863 == (1 << 26) - 1
46
; 4503599627370496 == (1 << 52)
57
; 4503599627370495 == (1 << 52) - 1
68

7-
define dso_local <8 x i64> @test_512_combine_evex(<8 x i64> noundef %0, <8 x i64> noundef %1, <8 x i64> noundef %2) local_unnamed_addr #0 {
8-
; X64-LABEL: test_512_combine_evex:
9-
; X64: # %bb.0:
10-
; X64-NEXT: vpbroadcastq {{.*#+}} zmm3 = [67108863,67108863,67108863,67108863,67108863,67108863,67108863,67108863]
11-
; X64-NEXT: vpandq %zmm3, %zmm0, %zmm0
12-
; X64-NEXT: vpandq %zmm3, %zmm1, %zmm1
13-
; X64-NEXT: vpandq %zmm3, %zmm2, %zmm2
14-
; X64-NOT: vpmul
15-
; X64-NOT: vpadd
16-
; X64-NEXT: vpmadd52luq %zmm1, %zmm2, %zmm0
17-
; X64-NEXT: retq
18-
%4 = and <8 x i64> %0, splat (i64 67108863)
19-
%5 = and <8 x i64> %1, splat (i64 67108863)
20-
%6 = and <8 x i64> %2, splat (i64 67108863)
21-
%7 = mul nuw nsw <8 x i64> %5, %4
22-
%8 = add nuw nsw <8 x i64> %7, %6
23-
ret <8 x i64> %8
24-
}
25-
26-
define dso_local <8 x i64> @test_512_no_combine_evex_v2(<8 x i64> noundef %0, <8 x i64> noundef %1, <8 x i64> noundef %2) local_unnamed_addr #0 {
27-
; X64-LABEL: test_512_no_combine_evex_v2:
28-
; X64-NOT: vpmadd52luq
29-
; X64: retq
30-
%4 = and <8 x i64> %0, splat (i64 4503599627370495)
31-
%5 = and <8 x i64> %1, splat (i64 4503599627370495)
32-
%6 = and <8 x i64> %2, splat (i64 4503599627370495)
33-
%7 = mul nuw nsw <8 x i64> %5, %4
34-
%8 = add nuw nsw <8 x i64> %7, %6
35-
ret <8 x i64> %8
9+
define <8 x i64> @test_512_combine(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
10+
%x_masked = and <8 x i64> %x, splat (i64 67108863)
11+
%y_masked = and <8 x i64> %y, splat (i64 67108863)
12+
%mul = mul nuw nsw <8 x i64> %x_masked, %y_masked
13+
%res = add nuw nsw <8 x i64> %mul, %z
14+
ret <8 x i64> %res
3615
}
3716

38-
define dso_local noundef <8 x i64> @test_512_no_combine_evex(<8 x i64> noundef %0, <8 x i64> noundef %1, <8 x i64> noundef %2) local_unnamed_addr #0 {
39-
; X64-LABEL: test_512_no_combine_evex:
40-
; X64: # %bb.0:
41-
; X64-NOT: vpmadd52
42-
; X64-NEXT: vpmullq %zmm0, %zmm1, %zmm0
43-
; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0
44-
; X64-NEXT: retq
45-
%4 = mul <8 x i64> %1, %0
46-
%5 = add <8 x i64> %4, %2
47-
ret <8 x i64> %5
17+
define <8 x i64> @test_512_combine_v2(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
18+
%x_masked = and <8 x i64> %x, splat (i64 1125899906842623) ; (1 << 50) - 1
19+
%y_masked = and <8 x i64> %y, splat (i64 3)
20+
%mul = mul nuw nsw <8 x i64> %x_masked, %y_masked
21+
%res = add nuw nsw <8 x i64> %mul, %z
22+
ret <8 x i64> %res
4823
}
4924

50-
define dso_local <4 x i64> @test_256_combine_evex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #1 {
51-
; X64-LABEL: test_256_combine_evex:
52-
; X64: # %bb.0:
53-
; X64-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
54-
; X64-NEXT: vpand %ymm3, %ymm0, %ymm0
55-
; X64-NEXT: vpand %ymm3, %ymm1, %ymm1
56-
; X64-NEXT: vpand %ymm3, %ymm2, %ymm2
57-
; X64-NOT: vpmul
58-
; X64-NOT: vpadd
59-
; X64-NEXT: vpmadd52luq %ymm1, %ymm2, %ymm0
60-
; X64-NEXT: retq
61-
%4 = and <4 x i64> %0, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
62-
%5 = and <4 x i64> %1, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
63-
%6 = and <4 x i64> %2, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
64-
%7 = mul nuw nsw <4 x i64> %5, %4
65-
%8 = add nuw nsw <4 x i64> %7, %6
66-
ret <4 x i64> %8
25+
define <8 x i64> @test_512_no_combine(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
26+
%x_masked = and <8 x i64> %x, splat (i64 4503599627370495)
27+
%y_masked = and <8 x i64> %y, splat (i64 4503599627370495)
28+
%mul = mul nuw nsw <8 x i64> %x_masked, %y_masked
29+
%res = add nuw nsw <8 x i64> %mul, %z
30+
ret <8 x i64> %res
6731
}
6832

69-
define dso_local noundef <4 x i64> @test_256_no_combine_evex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #1 {
70-
; X64-LABEL: test_256_no_combine_evex:
71-
; X64: # %bb.0:
72-
; X64-NOT: vpmadd52
73-
; X64-NEXT: vpmullq %ymm0, %ymm1, %ymm0
74-
; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
75-
; X64-NEXT: retq
76-
%4 = mul <4 x i64> %1, %0
77-
%5 = add <4 x i64> %4, %2
78-
ret <4 x i64> %5
33+
define <8 x i64> @test_512_no_combine_v2(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
34+
%mul = mul <8 x i64> %x, %y
35+
%res = add <8 x i64> %mul, %z
36+
ret <8 x i64> %res
7937
}
8038

81-
define dso_local <4 x i64> @test_256_combine_vex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #2 {
82-
; X64-LABEL: test_256_combine_vex:
83-
; X64: # %bb.0:
84-
; X64-NEXT: vpbroadcastq {{.*#+}} ymm3 = [67108863,67108863,67108863,67108863]
85-
; X64-NEXT: vpand %ymm3, %ymm0, %ymm0
86-
; X64-NEXT: vpand %ymm3, %ymm1, %ymm1
87-
; X64-NEXT: vpand %ymm3, %ymm2, %ymm2
88-
; X64-NOT: vpmul
89-
; X64-NOT: vpadd
90-
; X64-NEXT: {vex} vpmadd52luq %ymm1, %ymm2, %ymm0
91-
; X64-NEXT: retq
92-
%4 = and <4 x i64> %0, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
93-
%5 = and <4 x i64> %1, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
94-
%6 = and <4 x i64> %2, <i64 67108863, i64 67108863, i64 67108863, i64 67108863>
95-
%7 = mul nuw nsw <4 x i64> %5, %4
96-
%8 = add nuw nsw <4 x i64> %7, %6
97-
ret <4 x i64> %8
39+
define <4 x i64> @test_256_combine(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
40+
%x_masked = and <4 x i64> %x, splat(i64 67108863)
41+
%y_masked = and <4 x i64> %y, splat(i64 67108863)
42+
%mul = mul nuw nsw <4 x i64> %x_masked, %y_masked
43+
%res = add nuw nsw <4 x i64> %z, %mul
44+
ret <4 x i64> %res
9845
}
9946

100-
define dso_local noundef <4 x i64> @test_256_no_combine_vex(<4 x i64> noundef %0, <4 x i64> noundef %1, <4 x i64> noundef %2) local_unnamed_addr #2 {
101-
; X64-LABEL: test_256_no_combine_vex:
102-
; X64: # %bb.0:
103-
; X64-NOT: vpmadd52
104-
; X64-NEXT: vpmullq %ymm0, %ymm1, %ymm0
105-
; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0
106-
; X64-NEXT: retq
107-
%4 = mul <4 x i64> %1, %0
108-
%5 = add <4 x i64> %4, %2
109-
ret <4 x i64> %5
47+
define <4 x i64> @test_256_no_combine(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
48+
%mul = mul <4 x i64> %x, %y
49+
%res = add <4 x i64> %mul, %z
50+
ret <4 x i64> %res
11051
}
11152

112-
define i64 @scalar_no_ifma(i64 %a, i64 %b, i64 %acc) #0 {
113-
; X64-LABEL: scalar_no_ifma:
114-
; X64-NOT: vpmadd52
115-
; X64-NOT: vpmullq
116-
; X64: imulq
117-
; X64: ret
118-
entry:
119-
%mul = mul i64 %a, %b
120-
%res = add i64 %acc, %mul
121-
ret i64 %res
53+
define <2 x i64> @test_128_combine(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
54+
%x_masked = and <2 x i64> %x, splat (i64 67108863)
55+
%y_masked = and <2 x i64> %y, splat (i64 67108863)
56+
%mul = mul <2 x i64> %x_masked, %y_masked
57+
%res = add <2 x i64> %z, %mul
58+
ret <2 x i64> %res
12259
}
12360

124-
define <8 x i64> @mixed_width_too_wide(<8 x i64> %a, <8 x i64> %b, <8 x i64> %acc) #0 {
125-
; X64-LABEL: mixed_width_too_wide:
126-
; X64-NOT: vpmadd52luq
127-
; X64: vpmullq
128-
; X64: ret
129-
entry:
130-
; 40-bit and 13-bit, product fits < 2^53 (NOT < 2^52)
131-
%a40 = and <8 x i64> %a, splat (i64 1099511627775)
132-
%b13 = and <8 x i64> %b, splat (i64 8191)
133-
%mul = mul <8 x i64> %a40, %b13
134-
%res = add <8 x i64> %acc, %mul
135-
ret <8 x i64> %res
61+
; Sanity check we're not applying this here
62+
define <1 x i64> @test_scalar_no_ifma(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) {
63+
%mul = mul <1 x i64> %x, %y
64+
%res = add <1 x i64> %mul, %z
65+
ret <1 x i64> %res
13666
}
13767

138-
define <8 x i64> @zext32_inputs_not_safe(<8 x i32> %ai32, <8 x i32> %bi32, <8 x i64> %acc) #0 {
139-
; X64-LABEL: zext32_inputs_not_safe:
140-
; X64: vpmul
141-
; X64-NOT: vpmadd52luq
142-
; X64: ret
143-
entry:
144-
%a = zext <8 x i32> %ai32 to <8 x i64>
145-
%b = zext <8 x i32> %bi32 to <8 x i64>
146-
%mul = mul <8 x i64> %a, %b
147-
%res = add <8 x i64> %acc, %mul
148-
ret <8 x i64> %res
68+
define <8 x i64> @test_mixed_width_too_wide(<8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
69+
; 40-bit and 13-bit, too wide
70+
%x40 = and <8 x i64> %x, splat (i64 1099511627775)
71+
%y13 = and <8 x i64> %y, splat (i64 8191)
72+
%mul = mul <8 x i64> %x40, %y13
73+
%res = add <8 x i64> %z, %mul
74+
ret <8 x i64> %z
14975
}
15076

151-
define <8 x i64> @const_2pow51_times_2(<8 x i64> %acc) #0 {
152-
; X64-LABEL: const_2pow51_times_2:
153-
; X64-NOT: vpmadd52luq
154-
; X64: vpaddq
155-
; X64: ret
156-
entry:
157-
; product = 2^52
158-
%mul = mul <8 x i64> splat(i64 2251799813685248), splat(i64 2)
159-
%res = add <8 x i64> %acc, %mul ; needs full low-64 add
77+
define <8 x i64> @test_zext32_inputs_not_safe(<8 x i32> %xi32, <8 x i32> %yi32, <8 x i64> %z) {
78+
%x = zext <8 x i32> %xi32 to <8 x i64>
79+
%y = zext <8 x i32> %yi32 to <8 x i64>
80+
%mul = mul <8 x i64> %x, %y
81+
%res = add <8 x i64> %z, %mul
16082
ret <8 x i64> %res
16183
}
16284

163-
define <4 x i64> @safe_ifma_v4(<4 x i64> %a, <4 x i64> %b, <4 x i64> %acc) #1 {
164-
; X64-LABEL: safe_ifma_v4:
165-
; X64: vpmadd52luq
166-
; X64-NOT: vpmullq
167-
; X64: ret
168-
entry:
169-
%a26 = and <4 x i64> %a, splat (i64 67108863)
170-
%b26 = and <4 x i64> %b, splat (i64 67108863)
171-
%mul = mul <4 x i64> %a26, %b26
172-
%res = add <4 x i64> %acc, %mul
173-
ret <4 x i64> %res
174-
}
175-
176-
define <2 x i64> @safe_ifma_v2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %acc) #1 {
177-
; X64-LABEL: safe_ifma_v2:
178-
; X64: vpmadd52luq
179-
; X64-NOT: vpmullq
180-
; X64: ret
181-
entry:
182-
%a26 = and <2 x i64> %a, splat (i64 67108863)
183-
%b26 = and <2 x i64> %b, splat (i64 67108863)
184-
%mul = mul <2 x i64> %a26, %b26
185-
%res = add <2 x i64> %acc, %mul
186-
ret <2 x i64> %res
187-
}
188-
189-
define <4 x i64> @v4_no_vl_fallback(<4 x i64> %a, <4 x i64> %b, <4 x i64> %acc) #0 {
190-
; X64-LABEL: v4_no_vl_fallback:
191-
; X64-NOT: vpmadd52luq
192-
; X64: pmul
193-
; X64: ret
194-
entry:
195-
%a26 = and <4 x i64> %a, splat (i64 67108863)
196-
%b26 = and <4 x i64> %b, splat (i64 67108863)
197-
%mul = mul <4 x i64> %a26, %b26
198-
%res = add <4 x i64> %acc, %mul
199-
ret <4 x i64> %res
200-
}
201-
202-
define <16 x i64> @v16_test_split(<16 x i64> %a, <16 x i64> %b, <16 x i64> %acc) #1 {
203-
; X64-LABEL: v16_test_split:
204-
; X64: vpmadd52luq
205-
; X64: vpmadd52luq
206-
; X64: ret
207-
entry:
208-
%a26 = and <16 x i64> %a, splat (i64 67108863)
209-
%b26 = and <16 x i64> %b, splat (i64 67108863)
210-
%mul = mul <16 x i64> %a26, %b26
211-
%res = add <16 x i64> %acc, %mul
85+
define <16 x i64> @test_1024_combine_split(<16 x i64> %x, <16 x i64> %y, <16 x i64> %z) {
86+
%x_masked = and <16 x i64> %x, splat (i64 67108863)
87+
%y_masked = and <16 x i64> %y, splat (i64 67108863)
88+
%mul = mul <16 x i64> %x_masked, %y_masked
89+
%res = add <16 x i64> %z, %mul
21290
ret <16 x i64> %res
21391
}
214-
215-
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "target-features"="+avx,+avx2,+avx512dq,+avx512f,+avx512ifma,-avx512vl,+cmov,+crc32,+evex512,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" }
216-
attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "target-features"="+avx,+avx2,+avx512dq,+avx512f,+avx512ifma,+avx512vl,+cmov,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" }
217-
attributes #2 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "target-features"="+avx,+avx2,+avx512dq,+avx512f,+avx512vl,+avxifma,+cmov,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" }

0 commit comments

Comments
 (0)