Skip to content

Commit eb89c8a

Browse files
authored
[msan][NFCI] Add tests for the matrix multiplication intrinsics on Arm (llvm#174038)
Forked from corresponding files in llvm/test/CodeGen/AArch64
1 parent 670a68e commit eb89c8a

11 files changed

+1851
-0
lines changed

llvm/test/Instrumentation/MemorySanitizer/AArch64/aarch64-bf16-dotprod-intrinsics.ll

Lines changed: 503 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -mattr=+neon,+f8f16mm < %s -passes=msan -S | FileCheck %s
3+
;
4+
; Forked from llvm/test/CodeGen/AArch64/aarch64-matmul-fp16.ll
5+
;
6+
; Strictly handled:
7+
; - llvm.aarch64.neon.fmmla.v8f16.v16i8
8+
;
9+
; Heuristically handled: (none)
10+
11+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
12+
target triple = "aarch64--linux-android9001"
13+
14+
define <8 x half> @fmmla.v8f16.v16i8(<8 x half> %r, <16 x i8> %a, <16 x i8> %b) sanitize_memory {
15+
; CHECK-LABEL: define <8 x half> @fmmla.v8f16.v16i8(
16+
; CHECK-SAME: <8 x half> [[R:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
17+
; CHECK-NEXT: [[ENTRY:.*:]]
18+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
19+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 16), align 8
20+
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 32), align 8
21+
; CHECK-NEXT: call void @llvm.donothing()
22+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
23+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
24+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
25+
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
26+
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
27+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
28+
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
29+
; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
30+
; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1:![0-9]+]]
31+
; CHECK: [[BB6]]:
32+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
33+
; CHECK-NEXT: unreachable
34+
; CHECK: [[BB7]]:
35+
; CHECK-NEXT: [[VFMMLA1_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.fmmla.v8f16.v16i8(<8 x half> [[R]], <16 x i8> [[A]], <16 x i8> [[B]])
36+
; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
37+
; CHECK-NEXT: ret <8 x half> [[VFMMLA1_I]]
38+
;
39+
entry:
40+
%vfmmla1.i = tail call <8 x half> @llvm.aarch64.neon.fmmla.v8f16.v16i8(<8 x half> %r, <16 x i8> %a, <16 x i8> %b) #3
41+
ret <8 x half> %vfmmla1.i
42+
}
43+
44+
;.
45+
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
46+
;.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -mattr=+neon,+f8f32mm < %s -passes=msan -S | FileCheck %s
3+
;
4+
; Forked from llvm/test/CodeGen/AArch64/aarch64-matmul-fp32.ll
5+
;
6+
; Strictly handled:
7+
; - llvm.aarch64.neon.fmmla.v4f32.v16i8
8+
;
9+
; Heuristically handled: (none)
10+
11+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
12+
target triple = "aarch64--linux-android9001"
13+
14+
define <4 x float> @fmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %a, <16 x i8> %b) sanitize_memory {
15+
; CHECK-LABEL: define <4 x float> @fmmla.v4f32.v16i8(
16+
; CHECK-SAME: <4 x float> [[R:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
17+
; CHECK-NEXT: [[ENTRY:.*:]]
18+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
19+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 16), align 8
20+
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 32), align 8
21+
; CHECK-NEXT: call void @llvm.donothing()
22+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
23+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
24+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
25+
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
26+
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
27+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
28+
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
29+
; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
30+
; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1:![0-9]+]]
31+
; CHECK: [[BB6]]:
32+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
33+
; CHECK-NEXT: unreachable
34+
; CHECK: [[BB7]]:
35+
; CHECK-NEXT: [[VFMMLA1_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fmmla.v4f32.v16i8(<4 x float> [[R]], <16 x i8> [[A]], <16 x i8> [[B]])
36+
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
37+
; CHECK-NEXT: ret <4 x float> [[VFMMLA1_I]]
38+
;
39+
entry:
40+
%vfmmla1.i = tail call <4 x float> @llvm.aarch64.neon.fmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %a, <16 x i8> %b) #3
41+
ret <4 x float> %vfmmla1.i
42+
}
43+
;.
44+
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
45+
;.

0 commit comments

Comments
 (0)