|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| 2 | +; RUN: opt -mattr=+neon,+f8f32mm < %s -passes=msan -S | FileCheck %s |
| 3 | +; |
| 4 | +; Forked from llvm/test/CodeGen/AArch64/aarch64-matmul-fp32.ll |
| 5 | +; |
| 6 | +; Strictly handled: |
| 7 | +; - llvm.aarch64.neon.fmmla.v4f32.v16i8 |
| 8 | +; |
| 9 | +; Heuristically handled: (none) |
| 10 | + |
| 11 | +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" |
| 12 | +target triple = "aarch64--linux-android9001" |
| 13 | + |
| 14 | +define <4 x float> @fmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %a, <16 x i8> %b) sanitize_memory { |
| 15 | +; CHECK-LABEL: define <4 x float> @fmmla.v4f32.v16i8( |
| 16 | +; CHECK-SAME: <4 x float> [[R:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { |
| 17 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 18 | +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 |
| 19 | +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 16), align 8 |
| 20 | +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr getelementptr (i8, ptr @__msan_param_tls, i64 32), align 8 |
| 21 | +; CHECK-NEXT: call void @llvm.donothing() |
| 22 | +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 |
| 23 | +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 |
| 24 | +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 |
| 25 | +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 |
| 26 | +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] |
| 27 | +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 |
| 28 | +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 |
| 29 | +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] |
| 30 | +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1:![0-9]+]] |
| 31 | +; CHECK: [[BB6]]: |
| 32 | +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] |
| 33 | +; CHECK-NEXT: unreachable |
| 34 | +; CHECK: [[BB7]]: |
| 35 | +; CHECK-NEXT: [[VFMMLA1_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fmmla.v4f32.v16i8(<4 x float> [[R]], <16 x i8> [[A]], <16 x i8> [[B]]) |
| 36 | +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 |
| 37 | +; CHECK-NEXT: ret <4 x float> [[VFMMLA1_I]] |
| 38 | +; |
| 39 | +entry: |
| 40 | + %vfmmla1.i = tail call <4 x float> @llvm.aarch64.neon.fmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %a, <16 x i8> %b) #3 |
| 41 | + ret <4 x float> %vfmmla1.i |
| 42 | +} |
| 43 | +;. |
| 44 | +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} |
| 45 | +;. |
0 commit comments