From 7de09f4fab19913a02576ec5c3da37a1398c80ae Mon Sep 17 00:00:00 2001 From: Nathan Corbyn Date: Mon, 13 Oct 2025 14:54:57 +0100 Subject: [PATCH 1/2] [Matrix] Add tests identifying GVN and DSE opportunities for matrix store / load intrinsics --- .../Analysis/BasicAA/matrix-intrinsics.ll | 43 +++ .../DeadStoreElimination/matrix-intrinsics.ll | 305 ++++++++++++++++++ llvm/test/Transforms/GVN/matrix-intrinsics.ll | 85 +++++ 3 files changed, 433 insertions(+) create mode 100644 llvm/test/Analysis/BasicAA/matrix-intrinsics.ll create mode 100644 llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll create mode 100644 llvm/test/Transforms/GVN/matrix-intrinsics.ll diff --git a/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll new file mode 100644 index 0000000000000..1dfe4111ea3f7 --- /dev/null +++ b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll @@ -0,0 +1,43 @@ +; RUN: opt %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +; BasicAA should prove that loads from sufficiently large static offsets +; don't overlap with matrix loads with a statically known size. + +define <8 x double> @non_overlapping_strided_load(ptr %src) { +entry: + %src.offset = getelementptr inbounds double, ptr %src, i32 12 + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) + %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + %s = fadd <8 x double> %l, %l.2 + ret <8 x double> %s +} + +; CHECK-LABEL: Function: non_overlapping_strided_load: +; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) +; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) + +define <8 x double> @overlapping_strided_load(ptr %src) { +entry: + %src.offset = getelementptr inbounds double, ptr %src, i32 11 + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) + %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + %s = fadd <8 x double> %l, %l.2 + ret <8 x double> %s +} + +; CHECK-LABEL: Function: overlapping_strided_load: +; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) +; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) +; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) + +declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32) +declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32) diff --git a/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll new file mode 100644 index 0000000000000..ab063e9198b7a --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll @@ -0,0 +1,305 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=dse -S %s | FileCheck %s + +define void @dead_unstrided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @dead_unstrided_store_non_matrix_load( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = load double, ptr [[SRC]], align 8 +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + %l = load double, ptr %src + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + ret void +} + +define void @live_unstrided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @live_unstrided_store_non_matrix_load( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L_2:%.*]] = load double, ptr [[DST]], align 8 +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L_1]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + %l.2 = load double, ptr %dst + call void @llvm.matrix.column.major.store(<8 x double> %l.1, ptr %dst, i32 4, i1 false, i32 4, i32 2) + ret void +} + +define void @dead_strided_store(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @dead_strided_store( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 100, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 100, i1 false, i32 4, i32 2) + ret void +} + +define void @live_strided_store(ptr %ptr) { +; CHECK-LABEL: define void @live_strided_store( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[PTR]], i32 100, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[PTR]], i32 200, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[PTR]], i32 100, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %ptr, i32 100, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 200, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %ptr, i32 100, i1 false, i32 4, i32 2) + ret void +} + +define void @dead_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @dead_strided_store_non_matrix_load( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = load double, ptr [[SRC]], align 8 +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L_1]], ptr [[DST]], i32 100, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2) + %l.2 = load double, ptr %src + call void @llvm.matrix.column.major.store(<8 x double> %l.1, ptr %dst, i32 100, i1 false, i32 4, i32 2) + ret void +} + +define void @live_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @live_strided_store_non_matrix_load( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L_2:%.*]] = load double, ptr [[DST]], align 8 +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L_1]], ptr [[DST]], i32 100, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2) + %l.2 = load double, ptr %dst + call void @llvm.matrix.column.major.store(<8 x double> %l.1, ptr %dst, i32 100, i1 false, i32 4, i32 2) + ret void +} + +define void @dead_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, i32 %stride) { +; CHECK-LABEL: define void @dead_dynamically_strided_store( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + ret void +} + +define void @live_dynamically_strided_store(ptr %ptr, i32 %stride) { +; CHECK-LABEL: define void @live_dynamically_strided_store( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[STRIDE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[PTR]], i32 [[STRIDE]], i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %ptr, i32 %stride, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 %stride, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %ptr, i32 %stride, i1 false, i32 4, i32 2) + ret void +} + +define void @dead_dynamically_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst, i32 %stride) { +; CHECK-LABEL: define void @dead_dynamically_strided_store_non_matrix_load( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = load double, ptr [[SRC]], align 8 +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L_1]], ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + %l.2 = load double, ptr %src + call void @llvm.matrix.column.major.store(<8 x double> %l.1, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + ret void +} + +define void @live_dynamically_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %dst, i32 %stride) { +; CHECK-LABEL: define void @live_dynamically_strided_store_non_matrix_load( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L_1:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L_2:%.*]] = load double, ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + %l.2 = load double, ptr %dst + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + ret void +} + +define void @dead_unstrided_store(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @dead_unstrided_store( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2) + ret void +} + +define void @live_unstrided_store(ptr %ptr) { +; CHECK-LABEL: define void @live_unstrided_store( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[PTR]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[PTR]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[PTR]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %ptr, i32 4, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %ptr, i32 4, i1 false, i32 4, i32 2) + ret void +} + +define void @dead_non_matrix_store(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @dead_non_matrix_store( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DST_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 6 +; CHECK-NEXT: store double 4.200000e+01, ptr [[DST_OFFSET]], align 8 +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + %dst.offset = getelementptr inbounds double, ptr %src, i32 6 + store double 42.0, ptr %dst.offset + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2) + ret void +} + +define void @live_non_matrix_store(ptr %ptr) { +; CHECK-LABEL: define void @live_non_matrix_store( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PTR_OFFSET:%.*]] = getelementptr inbounds double, ptr [[PTR]], i32 6 +; CHECK-NEXT: store double 4.200000e+01, ptr [[PTR_OFFSET]], align 8 +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[PTR]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[PTR]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + %ptr.offset = getelementptr inbounds double, ptr %ptr, i32 6 + store double 42.0, ptr %ptr.offset + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %ptr, i32 4, i1 false, i32 4, i32 2) + ret void +} + +define void @dead_matrix_store_non_matrix_overwrite_unstrided(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @dead_matrix_store_non_matrix_overwrite_unstrided( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: store <8 x double> zeroinitializer, ptr [[DST]], align 64 +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + store <8 x double> zeroinitializer, ptr %dst + ret void +} + +define void @dead_matrix_store_non_matrix_overwrite_strided(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @dead_matrix_store_non_matrix_overwrite_strided( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2) +; CHECK-NEXT: store <16 x double> zeroinitializer, ptr [[DST]], align 128 +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2) + store <16 x double> zeroinitializer, ptr %dst + ret void +} + +define void @live_matrix_store_non_matrix_overwrite_unstrided(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @live_matrix_store_non_matrix_overwrite_unstrided( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[DST]], align 32 +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + store <4 x double> zeroinitializer, ptr %dst + ret void +} + +define void @live_matrix_store_non_matrix_overwrite_strided(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @live_matrix_store_non_matrix_overwrite_strided( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2) +; CHECK-NEXT: store <8 x double> zeroinitializer, ptr [[DST]], align 64 +; CHECK-NEXT: ret void +; +entry: + call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2) + store <8 x double> zeroinitializer, ptr %dst + ret void +} + +declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32) +declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32) diff --git a/llvm/test/Transforms/GVN/matrix-intrinsics.ll b/llvm/test/Transforms/GVN/matrix-intrinsics.ll new file mode 100644 index 0000000000000..3c6e65f534b4d --- /dev/null +++ b/llvm/test/Transforms/GVN/matrix-intrinsics.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=gvn -S %s | FileCheck %s + +define <8 x double> @redundant_unstrided_load(ptr %src) { +; CHECK-LABEL: define <8 x double> @redundant_unstrided_load( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 8 +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]] +; CHECK-NEXT: ret <8 x double> [[S]] +; +entry: + %src.offset = getelementptr inbounds double, ptr %src, i32 8 + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 4, i1 false, i32 4, i32 2) + %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2) + %s = fadd <8 x double> %l, %l.2 + ret <8 x double> %s +} + +define <8 x double> @redundant_unstrided_load_non_matrix_store(ptr %src) { +; CHECK-LABEL: define <8 x double> @redundant_unstrided_load_non_matrix_store( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 8 +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: store double 4.200000e+01, ptr [[SRC]], align 8 +; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]] +; CHECK-NEXT: ret <8 x double> [[S]] +; +entry: + %src.offset = getelementptr inbounds double, ptr %src, i32 8 + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2) + store double 42.0, ptr %src + %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2) + %s = fadd <8 x double> %l, %l.2 + ret <8 x double> %s +} + +define <8 x double> @redundant_strided_load(ptr %src) { +; CHECK-LABEL: define <8 x double> @redundant_strided_load( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 16 +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 8, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]] +; CHECK-NEXT: ret <8 x double> [[S]] +; +entry: + %src.offset = getelementptr inbounds double, ptr %src, i32 16 + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) + %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + %s = fadd <8 x double> %l, %l.2 + ret <8 x double> %s +} + +define <8 x double> @redundant_strided_load_non_matrix_store(ptr %src) { +; CHECK-LABEL: define <8 x double> @redundant_strided_load_non_matrix_store( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 16 +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2) +; CHECK-NEXT: store double 4.200000e+01, ptr [[SRC]], align 8 +; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]] +; CHECK-NEXT: ret <8 x double> [[S]] +; +entry: + %src.offset = getelementptr inbounds double, ptr %src, i32 16 + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + store double 42.0, ptr %src + %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) + %s = fadd <8 x double> %l, %l.2 + ret <8 x double> %s +} + +declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32) +declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32) From 31e47b673ce2954c78ff9a151ef13d2924efe17e Mon Sep 17 00:00:00 2001 From: Nathan Corbyn Date: Wed, 22 Oct 2025 12:08:44 +0100 Subject: [PATCH 2/2] Update tests --- .../Analysis/BasicAA/matrix-intrinsics.ll | 29 ++--- .../DeadStoreElimination/matrix-intrinsics.ll | 85 +++++++++----- llvm/test/Transforms/GVN/matrix-intrinsics.ll | 107 +++++++++++++----- 3 files changed, 146 insertions(+), 75 deletions(-) diff --git a/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll index 1dfe4111ea3f7..1de8ab5d3e590 100644 --- a/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll +++ b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll @@ -4,40 +4,27 @@ ; don't overlap with matrix loads with a statically known size. define <8 x double> @non_overlapping_strided_load(ptr %src) { +; CHECK-LABEL: Function: non_overlapping_strided_load: +; Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) +; Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) entry: %src.offset = getelementptr inbounds double, ptr %src, i32 12 %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) - %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) - %s = fadd <8 x double> %l, %l.2 - ret <8 x double> %s + ret <8 x double> %l } -; CHECK-LABEL: Function: non_overlapping_strided_load: +define <8 x double> @overlapping_strided_load(ptr %src) { +; CHECK-LABEL: Function: overlapping_strided_load: ; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) -; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) ; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) -; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) -; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) -; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) - -define <8 x double> @overlapping_strided_load(ptr %src) { +; entry: %src.offset = getelementptr inbounds double, ptr %src, i32 11 %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) - %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) - %s = fadd <8 x double> %l, %l.2 - ret <8 x double> %s + ret <8 x double> %l } -; CHECK-LABEL: Function: overlapping_strided_load: -; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) -; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) -; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) -; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) -; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) -; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) - declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32) declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32) diff --git a/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll index ab063e9198b7a..ae3c7464656df 100644 --- a/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll @@ -10,9 +10,9 @@ define void @dead_unstrided_store_non_matrix_load(ptr noalias %src, ptr noalias ; CHECK-NEXT: ret void ; entry: - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) %l = load double, ptr %src - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) ret void } @@ -28,9 +28,9 @@ define void @live_unstrided_store_non_matrix_load(ptr noalias %src, ptr noalias ; entry: %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) %l.2 = load double, ptr %dst - call void @llvm.matrix.column.major.store(<8 x double> %l.1, ptr %dst, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l.1, ptr %dst, i32 4, i1 false, i32 4, i32 2) ret void } @@ -44,9 +44,9 @@ define void @dead_strided_store(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: ret void ; entry: - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2) %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 100, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 100, i1 false, i32 4, i32 2) ret void } @@ -60,9 +60,9 @@ define void @live_strided_store(ptr %ptr) { ; CHECK-NEXT: ret void ; entry: - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %ptr, i32 100, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %ptr, i32 100, i1 false, i32 4, i32 2) %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 200, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %ptr, i32 100, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %ptr, i32 100, i1 false, i32 4, i32 2) ret void } @@ -78,9 +78,9 @@ define void @dead_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %d ; entry: %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2) %l.2 = load double, ptr %src - call void @llvm.matrix.column.major.store(<8 x double> %l.1, ptr %dst, i32 100, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l.1, ptr %dst, i32 100, i1 false, i32 4, i32 2) ret void } @@ -96,9 +96,9 @@ define void @live_strided_store_non_matrix_load(ptr noalias %src, ptr noalias %d ; entry: %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2) %l.2 = load double, ptr %dst - call void @llvm.matrix.column.major.store(<8 x double> %l.1, ptr %dst, i32 100, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l.1, ptr %dst, i32 100, i1 false, i32 4, i32 2) ret void } @@ -112,9 +112,9 @@ define void @dead_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, ; CHECK-NEXT: ret void ; entry: - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) ret void } @@ -128,9 +128,9 @@ define void @live_dynamically_strided_store(ptr %ptr, i32 %stride) { ; CHECK-NEXT: ret void ; entry: - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %ptr, i32 %stride, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %ptr, i32 %stride, i1 false, i32 4, i32 2) %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 %stride, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %ptr, i32 %stride, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %ptr, i32 %stride, i1 false, i32 4, i32 2) ret void } @@ -146,9 +146,9 @@ define void @dead_dynamically_strided_store_non_matrix_load(ptr noalias %src, pt ; entry: %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) %l.2 = load double, ptr %src - call void @llvm.matrix.column.major.store(<8 x double> %l.1, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l.1, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) ret void } @@ -163,9 +163,9 @@ define void @live_dynamically_strided_store_non_matrix_load(ptr noalias %src, pt ; entry: %l.1 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) %l.2 = load double, ptr %dst - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2) ret void } @@ -179,9 +179,9 @@ define void @dead_unstrided_store(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: ret void ; entry: - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2) %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2) ret void } @@ -195,9 +195,9 @@ define void @live_unstrided_store(ptr %ptr) { ; CHECK-NEXT: ret void ; entry: - call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %ptr, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr %ptr, i32 4, i1 false, i32 4, i32 2) %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %ptr, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %ptr, i32 4, i1 false, i32 4, i32 2) ret void } @@ -215,7 +215,7 @@ entry: %dst.offset = getelementptr inbounds double, ptr %src, i32 6 store double 42.0, ptr %dst.offset %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2) ret void } @@ -233,7 +233,7 @@ entry: %ptr.offset = getelementptr inbounds double, ptr %ptr, i32 6 store double 42.0, ptr %ptr.offset %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %ptr, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %ptr, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %ptr, i32 4, i1 false, i32 4, i32 2) ret void } @@ -301,5 +301,38 @@ entry: ret void } +define void @dead_matrix_store_dimension_change(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @dead_matrix_store_dimension_change( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v9f64.i32(<9 x double> zeroinitializer, ptr [[DST]], i32 3, i1 false, i32 3, i32 3) +; CHECK-NEXT: ret void +; +entry: + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v9f64.i32(<9 x double> zeroinitializer, ptr %dst, i32 3, i1 false, i32 3, i32 3) + ret void +} + +define void @live_matrix_store_dimension_change(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @live_matrix_store_dimension_change( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 8, i1 false, i32 4, i32 2) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v9f64.i32(<9 x double> zeroinitializer, ptr [[DST]], i32 3, i1 false, i32 3, i32 3) +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: ret void +; +entry: + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 8, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v9f64.i32(<9 x double> zeroinitializer, ptr %dst, i32 3, i1 false, i32 3, i32 3) + call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2) + ret void +} + declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32) +declare <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr, i32, i1, i32, i32) declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32) diff --git a/llvm/test/Transforms/GVN/matrix-intrinsics.ll b/llvm/test/Transforms/GVN/matrix-intrinsics.ll index 3c6e65f534b4d..78dbfe1ef6bd8 100644 --- a/llvm/test/Transforms/GVN/matrix-intrinsics.ll +++ b/llvm/test/Transforms/GVN/matrix-intrinsics.ll @@ -1,85 +1,136 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=gvn -S %s | FileCheck %s -define <8 x double> @redundant_unstrided_load(ptr %src) { -; CHECK-LABEL: define <8 x double> @redundant_unstrided_load( +define void @redundant_unstrided_load(ptr %src) { +; CHECK-LABEL: define void @redundant_unstrided_load( ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 8 ; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2) ; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) ; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2) -; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]] -; CHECK-NEXT: ret <8 x double> [[S]] +; CHECK-NEXT: call void @use(<8 x double> [[L]]) +; CHECK-NEXT: call void @use(<8 x double> [[L_2]]) +; CHECK-NEXT: ret void ; entry: %src.offset = getelementptr inbounds double, ptr %src, i32 8 %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 4, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64(<8 x double> %l, ptr %src, i32 4, i1 false, i32 4, i32 2) %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2) - %s = fadd <8 x double> %l, %l.2 - ret <8 x double> %s + call void @use(<8 x double> %l) + call void @use(<8 x double> %l.2) + ret void } -define <8 x double> @redundant_unstrided_load_non_matrix_store(ptr %src) { -; CHECK-LABEL: define <8 x double> @redundant_unstrided_load_non_matrix_store( +define void @redundant_unstrided_load_non_matrix_store(ptr %src) { +; CHECK-LABEL: define void @redundant_unstrided_load_non_matrix_store( ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 8 +; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 1 ; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2) ; CHECK-NEXT: store double 4.200000e+01, ptr [[SRC]], align 8 ; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2) -; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]] -; CHECK-NEXT: ret <8 x double> [[S]] +; CHECK-NEXT: call void @use(<8 x double> [[L]]) +; CHECK-NEXT: call void @use(<8 x double> [[L_2]]) +; CHECK-NEXT: ret void ; entry: - %src.offset = getelementptr inbounds double, ptr %src, i32 8 + %src.offset = getelementptr inbounds double, ptr %src, i32 1 %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2) store double 42.0, ptr %src %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2) - %s = fadd <8 x double> %l, %l.2 - ret <8 x double> %s + call void @use(<8 x double> %l) + call void @use(<8 x double> %l.2) + ret void } -define <8 x double> @redundant_strided_load(ptr %src) { -; CHECK-LABEL: define <8 x double> @redundant_strided_load( +define void @redundant_strided_load(ptr %src) { +; CHECK-LABEL: define void @redundant_strided_load( ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 16 ; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2) ; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 8, i1 false, i32 4, i32 2) ; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2) -; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]] -; CHECK-NEXT: ret <8 x double> [[S]] +; CHECK-NEXT: call void @use(<8 x double> [[L]]) +; CHECK-NEXT: call void @use(<8 x double> [[L_2]]) +; CHECK-NEXT: ret void ; entry: %src.offset = getelementptr inbounds double, ptr %src, i32 16 %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) - call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) + call void @llvm.matrix.column.major.store.v8f64(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) - %s = fadd <8 x double> %l, %l.2 - ret <8 x double> %s + call void @use(<8 x double> %l) + call void @use(<8 x double> %l.2) + ret void + } -define <8 x double> @redundant_strided_load_non_matrix_store(ptr %src) { -; CHECK-LABEL: define <8 x double> @redundant_strided_load_non_matrix_store( +define void @redundant_strided_load_non_matrix_store(ptr %src) { +; CHECK-LABEL: define void @redundant_strided_load_non_matrix_store( ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 16 ; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2) ; CHECK-NEXT: store double 4.200000e+01, ptr [[SRC]], align 8 ; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2) -; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]] -; CHECK-NEXT: ret <8 x double> [[S]] +; CHECK-NEXT: call void @use(<8 x double> [[L]]) +; CHECK-NEXT: call void @use(<8 x double> [[L_2]]) +; CHECK-NEXT: ret void ; entry: %src.offset = getelementptr inbounds double, ptr %src, i32 16 %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) store double 42.0, ptr %src %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) - %s = fadd <8 x double> %l, %l.2 - ret <8 x double> %s + call void @use(<8 x double> %l) + call void @use(<8 x double> %l.2) + ret void +} + +define void @repeat_load_dimension_change_project(ptr %src) { +; CHECK-LABEL: define void @repeat_load_dimension_change_project( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L_2:%.*]] = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr [[SRC]], i32 3, i1 false, i32 3, i32 3) +; CHECK-NEXT: [[L_3:%.*]] = shufflevector <9 x double> [[L_2]], <9 x double> zeroinitializer, <8 x i32> +; CHECK-NEXT: call void @use(<8 x double> [[L]]) +; CHECK-NEXT: call void @use(<8 x double> [[L_3]]) +; CHECK-NEXT: ret void +; +entry: + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + %l.2 = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr %src, i32 3, i1 false, i32 3, i32 3) + %l.3 = shufflevector <9 x double> %l.2, <9 x double> zeroinitializer, <8 x i32> + call void @use(<8 x double> %l) + call void @use(<8 x double> %l.3) + ret void +} + +define void @repeat_load_dimension_change_shuffle(ptr %src) { +; CHECK-LABEL: define void @repeat_load_dimension_change_shuffle( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2) +; CHECK-NEXT: [[L_2:%.*]] = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr [[SRC]], i32 3, i1 false, i32 3, i32 3) +; CHECK-NEXT: [[L_3:%.*]] = shufflevector <9 x double> [[L_2]], <9 x double> zeroinitializer, <8 x i32> +; CHECK-NEXT: call void @use(<8 x double> [[L]]) +; CHECK-NEXT: call void @use(<8 x double> [[L_3]]) +; CHECK-NEXT: ret void +; +entry: + %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2) + %l.2 = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr %src, i32 3, i1 false, i32 3, i32 3) + %l.3 = shufflevector <9 x double> %l.2, <9 x double> zeroinitializer, <8 x i32> + call void @use(<8 x double> %l) + call void @use(<8 x double> %l.3) + ret void } declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32) +declare <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr, i32, i1, i32, i32) declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32) +declare void @use(<8 x double>)