-
Notifications
You must be signed in to change notification settings - Fork 15k
[Matrix] Add tests identifying GVN and DSE opportunities for matrix store / load intrinsics #163573
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Matrix] Add tests identifying GVN and DSE opportunities for matrix store / load intrinsics #163573
Conversation
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Nathan Corbyn (cofibrant) ChangesThis patch adds several tests identifying potential opportunities for eliminating dead stores and redundant loads when using the CC @fhahn Full diff: https://github.com/llvm/llvm-project/pull/163573.diff 3 Files Affected:
diff --git a/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll
new file mode 100644
index 0000000000000..71647f139725e
--- /dev/null
+++ b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll
@@ -0,0 +1,81 @@
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+; BasicAA should prove that loads from sufficiently large static offsets
+; don't overlap with matrix loads with a statically known size.
+
+define <8 x double> @non_overlapping_strided_load(ptr %src) {
+entry:
+ %src.offset = getelementptr inbounds double, double* %src, i32 16
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+ %s = fadd <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+; CHECK-LABEL: Function: non_overlapping_strided_load:
+; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+
+define <8 x double> @non_overlapping_strided_load_i128(ptr %src) {
+entry:
+ %src.offset = getelementptr inbounds double, double* %src, i128 u0x200000000
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 u0x100000000, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i128 u0x100000000, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i28(ptr %src.offset, i128 u0x100000000, i1 false, i32 4, i32 2)
+ %s = fadd <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+; CHECK-LABEL: Function: non_overlapping_strided_load_i128
+; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2)
+; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
+; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
+; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
+; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
+; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2)
+
+define <8 x double> @overlapping_strided_load(ptr %src) {
+entry:
+ %src.offset = getelementptr inbounds double, double* %src, i32 15
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+ %s = fadd <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+; CHECK-LABEL: Function: overlapping_strided_load:
+; CHECK: Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+; CHECK: NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+; CHECK: Just Mod: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+; CHECK: NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+; CHECK: Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+
+define <8 x double> @overlapping_strided_load_i128(ptr %src) {
+entry:
+ %src.offset = getelementptr inbounds double, double* %src, i128 u0x100000000
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 u0x100000000, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i128 u0x100000000, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i28(ptr %src.offset, i128 u0x100000000, i1 false, i32 4, i32 2)
+ %s = fadd <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+; Function: overlapping_strided_load_i128
+; Just Ref: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2)
+; NoModRef: %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
+; Just Mod: call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
+; Just Mod: call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2) <-> %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
+; NoModRef: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2)
+; Just Ref: %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr %src.offset, i128 4294967296, i1 false, i32 4, i32 2) <-> call void @llvm.matrix.column.major.store.v8f64.i128(<8 x double> %l, ptr %src, i128 4294967296, i1 false, i32 4, i32 2)
+
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i128(ptr, i128, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i128(<8 x double>, ptr, i128, i1, i32, i32)
diff --git a/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll
new file mode 100644
index 0000000000000..7199006ccbeb0
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=dse -S < %s | FileCheck %s
+
+define void @dead_unstrided_store(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_unstrided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 4, i1 false, i32 4, i32 2)
+ ret void
+}
+
+define void @dead_strided_store(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_strided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 100, i1 false, i32 4, i32 2)
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 100, i1 false, i32 4, i32 2)
+ ret void
+}
+
+define void @dead_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, i32 %stride) {
+; CHECK-LABEL: define void @dead_dynamically_strided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> zeroinitializer, ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[DST]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @llvm.matrix.column.major.store(<8 x double> zeroinitializer, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i32 %stride, i1 false, i32 4, i32 2)
+ ret void
+}
+
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
diff --git a/llvm/test/Transforms/GVN/matrix-intrinsics.ll b/llvm/test/Transforms/GVN/matrix-intrinsics.ll
new file mode 100644
index 0000000000000..7fd2855e11868
--- /dev/null
+++ b/llvm/test/Transforms/GVN/matrix-intrinsics.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
+define <8 x double> @redundant_unstrided_load(ptr %src) {
+; CHECK-LABEL: define <8 x double> @redundant_unstrided_load(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 8
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %src.offset = getelementptr inbounds double, double* %src, i32 8
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 4, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 4, i1 false, i32 4, i32 2)
+ %s = fadd <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+define <8 x double> @redundant_strided_load(ptr %src) {
+; CHECK-LABEL: define <8 x double> @redundant_strided_load(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds double, ptr [[SRC]], i32 16
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i32(<8 x double> [[L]], ptr [[SRC]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC_OFFSET]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %src.offset = getelementptr inbounds double, double* %src, i32 16
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %src, i32 8, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src.offset, i32 8, i1 false, i32 4, i32 2)
+ %s = fadd <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
|
0d89928 to
d4db855
Compare
d4db855 to
a85cc0a
Compare
|
Currently blocked on a bug in the IR verifier |
| %s = fadd <8 x double> %l, %l.2 | ||
| ret <8 x double> %s | ||
| } | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for DSE, we probably also want tests that cover the interaction between matrix intrinsics and normal loads/stores:
- normal store is dead because the last read was from a matrix intrinsic that happened before the store
- normal store is not dead because the last read by a matrix intrinsic happens after the store
- matrix intrinsic store is dead because the last read was from a normal store that happened before the store
- matrix intrinsic store is not dead because the last read from a normal store happens after the store
|
Once I've actioned Jon's suggestion, this should be ready for review once #163729 lands |
a85cc0a to
3611780
Compare
|
Given the change to the objective of #163729, I've updated this PR so that these two changes are now decoupled |
ed18760 to
1eaff3a
Compare
…tore / load intrinsics
1eaff3a to
7de09f4
Compare
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 | ||
| ; RUN: opt -passes=gvn -S %s | FileCheck %s | ||
|
|
||
| define <8 x double> @redundant_unstrided_load(ptr %src) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I might have missed it, but I think it would also be good to add a few tests where we have 2 matrix stores with different number of rows and columns for DSE (i.e 2 stores to the same pointer one storing more than the other, with both possible orderings; one case should eventually be simplified). And something similar for GVN with loads
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
…or matrix store / load intrinsics (#163573) This patch adds several tests identifying potential opportunities for eliminating dead stores and redundant loads when using the `llvm.matrix.column.major.store.*` and `llvm.matrix.column.major.load.*` intrinsics. PR: llvm/llvm-project#163573
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/154/builds/23027 Here is the relevant piece of the build log for the reference |
This patch adds several tests identifying potential opportunities for eliminating dead stores and redundant loads when using the
llvm.matrix.column.major.store.*andllvm.matrix.column.major.load.*intrinsics.CC @fhahn