Skip to content

Commit 9a17451

Browse files
authored
[NFCI][msan] Precommit tests for AVX-VNNI (llvm#153135)
The tests largely cover AVX-VNNI (Vector Neural Network Instructions): - vpdpbusd, vpdpbusds - vpdpwssd, vpdpwssds AVX-VNNI-INT8: - vpdpbssd, vpdpbssds - vpdpbsud, vpdpbsuds - vpdpbuud, vpdpbuuds AVX-VNNI-INT16: - vpdpwsud, vpdpwsuds - vpdpwusd, vpdpwusds - vpdpwuud, vpdpwuuds These instructions are currently heuristically handled (by OR'ing together the vectors). This is incorrect because: 1) multiplication by a zero should result in an initialized value 2) the addition is horizontal (within vectors, not "vertically" between vectors). Future work can improve the instrumentation by applying the updated handleVectorPmaddIntrinsic() from llvm#152941
1 parent 457b14c commit 9a17451

9 files changed

+4823
-0
lines changed

llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll

Lines changed: 773 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll

Lines changed: 1128 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll

Lines changed: 655 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll

Lines changed: 698 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll

Lines changed: 327 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll

Lines changed: 338 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -mattr=+avx512vnni,+avx512vl,+avxvnni -passes=msan -S | FileCheck %s
3+
4+
; Forked from llvm/test/CodeGen/X86/avx_vnni-intrinsics.ll
5+
;
6+
; Handled strictly: (none)
7+
;
8+
; Handled heuristically:
9+
; - llvm.x86.avx512.vpdpbusd.128
10+
; - llvm.x86.avx512.vpdpbusd.256
11+
; - llvm.x86.avx512.vpdpbusds.128
12+
; - llvm.x86.avx512.vpdpbusds.256
13+
; - llvm.x86.avx512.vpdpwssd.128
14+
; - llvm.x86.avx512.vpdpwssd.256
15+
; - llvm.x86.avx512.vpdpwssds.128
16+
; - llvm.x86.avx512.vpdpwssds.256
17+
18+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
19+
target triple = "x86_64-unknown-linux-gnu"
20+
21+
declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
22+
23+
define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) sanitize_memory {
24+
; CHECK-LABEL: define <8 x i32> @test_int_x86_avx_vpdpbusd_256(
25+
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR1:[0-9]+]] {
26+
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
27+
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
28+
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
29+
; CHECK-NEXT: call void @llvm.donothing()
30+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
31+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP3]]
32+
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
33+
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
34+
; CHECK-NEXT: ret <8 x i32> [[RES]]
35+
;
36+
%res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
37+
ret <8 x i32> %res
38+
}
39+
40+
declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
41+
42+
define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) sanitize_memory {
43+
; CHECK-LABEL: define <4 x i32> @test_int_x86_avx_vpdpbusd_128(
44+
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR1]] {
45+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
46+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
47+
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
48+
; CHECK-NEXT: call void @llvm.donothing()
49+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
50+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP3]]
51+
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
52+
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
53+
; CHECK-NEXT: ret <4 x i32> [[RES]]
54+
;
55+
%res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
56+
ret <4 x i32> %res
57+
}
58+
59+
declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
60+
61+
define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) sanitize_memory {
62+
; CHECK-LABEL: define <8 x i32> @test_int_x86_avx_vpdpbusds_256(
63+
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR1]] {
64+
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
65+
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
66+
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
67+
; CHECK-NEXT: call void @llvm.donothing()
68+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
69+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP3]]
70+
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
71+
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
72+
; CHECK-NEXT: ret <8 x i32> [[RES]]
73+
;
74+
%res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
75+
ret <8 x i32> %res
76+
}
77+
78+
declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
79+
80+
define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) sanitize_memory {
81+
; CHECK-LABEL: define <4 x i32> @test_int_x86_avx_vpdpbusds_128(
82+
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR1]] {
83+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
84+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
85+
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
86+
; CHECK-NEXT: call void @llvm.donothing()
87+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
88+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP3]]
89+
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
90+
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
91+
; CHECK-NEXT: ret <4 x i32> [[RES]]
92+
;
93+
%res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
94+
ret <4 x i32> %res
95+
}
96+
97+
declare <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>)
98+
99+
define <8 x i32>@test_int_x86_avx_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) sanitize_memory {
100+
; CHECK-LABEL: define <8 x i32> @test_int_x86_avx_vpdpwssd_256(
101+
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR1]] {
102+
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
103+
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
104+
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
105+
; CHECK-NEXT: call void @llvm.donothing()
106+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
107+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP3]]
108+
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
109+
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
110+
; CHECK-NEXT: ret <8 x i32> [[RES]]
111+
;
112+
%res = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
113+
ret <8 x i32> %res
114+
}
115+
116+
declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
117+
118+
define <4 x i32>@test_int_x86_avx_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) sanitize_memory {
119+
; CHECK-LABEL: define <4 x i32> @test_int_x86_avx_vpdpwssd_128(
120+
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR1]] {
121+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
122+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
123+
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
124+
; CHECK-NEXT: call void @llvm.donothing()
125+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
126+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP3]]
127+
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
128+
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
129+
; CHECK-NEXT: ret <4 x i32> [[RES]]
130+
;
131+
%res = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
132+
ret <4 x i32> %res
133+
}
134+
135+
declare <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>)
136+
137+
define <8 x i32>@test_int_x86_avx_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) sanitize_memory {
138+
; CHECK-LABEL: define <8 x i32> @test_int_x86_avx_vpdpwssds_256(
139+
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR1]] {
140+
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
141+
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
142+
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
143+
; CHECK-NEXT: call void @llvm.donothing()
144+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
145+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP3]]
146+
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
147+
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
148+
; CHECK-NEXT: ret <8 x i32> [[RES]]
149+
;
150+
%res = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
151+
ret <8 x i32> %res
152+
}
153+
154+
declare <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>)
155+
156+
define <4 x i32>@test_int_x86_avx_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) sanitize_memory {
157+
; CHECK-LABEL: define <4 x i32> @test_int_x86_avx_vpdpwssds_128(
158+
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR1]] {
159+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
160+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
161+
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
162+
; CHECK-NEXT: call void @llvm.donothing()
163+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
164+
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP3]]
165+
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
166+
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
167+
; CHECK-NEXT: ret <4 x i32> [[RES]]
168+
;
169+
%res = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
170+
ret <4 x i32> %res
171+
}

0 commit comments

Comments
 (0)