Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Feb 25, 2025

No description provided.

Copy link
Contributor Author

arsenm commented Feb 25, 2025

@llvmbot
Copy link
Member

llvmbot commented Feb 25, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 23.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128645.diff

1 Files Affected:

  • (added) llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll (+444)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
new file mode 100644
index 0000000000000..83d9d0d032ed1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < %s | FileCheck %s
+
+define i16 @extract_elt0_v2i16_readfirstlane(<2 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v2i16_readfirstlane(
+; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 0
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src)
+  %elt = extractelement <2 x i16> %vec, i32 0
+  ret i16 %elt
+}
+
+define i16 @extract_elt0_v1i16_readfirstlane(<1 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v1i16_readfirstlane(
+; CHECK-SAME: <1 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <1 x i16> [[VEC]], i64 0
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> %src)
+  %elt = extractelement <1 x i16> %vec, i32 0
+  ret i16 %elt
+}
+
+define i16 @extract_elt1_v2i16_readfirstlane(<2 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt1_v2i16_readfirstlane(
+; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 1
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src)
+  %elt = extractelement <2 x i16> %vec, i32 1
+  ret i16 %elt
+}
+
+define i16 @extract_elt0_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 0
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %elt = extractelement <4 x i16> %vec, i32 0
+  ret i16 %elt
+}
+
+define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt2_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 2
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %elt = extractelement <4 x i16> %vec, i32 2
+  ret i16 %elt
+}
+
+define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 3, i32 2>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt30_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt30_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 3, i32 0>
+  ret <2 x i16> %shuffle
+}
+
+define half @extract_elt0_v2f16_readfirstlane(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt0_v2f16_readfirstlane(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 0
+; CHECK-NEXT:    ret half [[ELT]]
+;
+  %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src)
+  %elt = extractelement <2 x half> %vec, i32 0
+  ret half %elt
+}
+
+define half @extract_elt1_v2f16_readfirstlane(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt1_v2f16_readfirstlane(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 1
+; CHECK-NEXT:    ret half [[ELT]]
+;
+  %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src)
+  %elt = extractelement <2 x half> %vec, i32 1
+  ret half %elt
+}
+
+; Don't break on illegal types
+define i8 @extract_elt0_v4i8_readfirstlane(<4 x i8> %src) {
+; CHECK-LABEL: define i8 @extract_elt0_v4i8_readfirstlane(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i8> @llvm.amdgcn.readfirstlane.v4i8(<4 x i8> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x i8> [[VEC]], i64 0
+; CHECK-NEXT:    ret i8 [[ELT]]
+;
+  %vec = call <4 x i8> @llvm.amdgcn.readfirstlane.v4ii8(<4 x i8> %src)
+  %elt = extractelement <4 x i8> %vec, i32 0
+  ret i8 %elt
+}
+
+; Don't break on illegal types
+define i32 @extract_elt0_nxv4i32_readfirstlane(<vscale x 2 x i32> %src) {
+; CHECK-LABEL: define i32 @extract_elt0_nxv4i32_readfirstlane(
+; CHECK-SAME: <vscale x 2 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <vscale x 2 x i32> @llvm.amdgcn.readfirstlane.nxv2i32(<vscale x 2 x i32> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <vscale x 2 x i32> [[VEC]], i64 0
+; CHECK-NEXT:    ret i32 [[ELT]]
+;
+  %vec = call <vscale x 2 x i32> @llvm.amdgcn.readfirstlane.nxv2i32(<vscale x 2 x i32> %src)
+  %elt = extractelement <vscale x 2 x i32> %vec, i32 0
+  ret i32 %elt
+}
+
+define i32 @extract_elt0_v2i32_readfirstlane(<2 x i32> %src) {
+; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0
+; CHECK-NEXT:    ret i32 [[ELT]]
+;
+  %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src)
+  %elt = extractelement <2 x i32> %vec, i32 0
+  ret i32 %elt
+}
+
+define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(<2 x ptr addrspace(3)> %src) {
+; CHECK-LABEL: define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(
+; CHECK-SAME: <2 x ptr addrspace(3)> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x ptr addrspace(3)> [[VEC]], i64 0
+; CHECK-NEXT:    ret ptr addrspace(3) [[ELT]]
+;
+  %vec = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> %src)
+  %elt = extractelement <2 x ptr addrspace(3)> %vec, i32 0
+  ret ptr addrspace(3) %elt
+}
+
+define i64 @extract_elt0_v2i64_readfirstlane(<2 x i64> %src) {
+; CHECK-LABEL: define i64 @extract_elt0_v2i64_readfirstlane(
+; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 0
+; CHECK-NEXT:    ret i64 [[ELT]]
+;
+  %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src)
+  %elt = extractelement <2 x i64> %vec, i32 0
+  ret i64 %elt
+}
+
+define i64 @extract_elt1_v2i64_readfirstlane(<2 x i64> %src) {
+; CHECK-LABEL: define i64 @extract_elt1_v2i64_readfirstlane(
+; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 1
+; CHECK-NEXT:    ret i64 [[ELT]]
+;
+  %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src)
+  %elt = extractelement <2 x i64> %vec, i32 1
+  ret i64 %elt
+}
+
+define <3 x i16> @extract_elt012_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <3 x i16> @extract_elt012_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x i16> %shuffle
+}
+
+define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <3 x i16> @extract_elt123_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <3 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x i16> %shuffle
+}
+
+define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x i32> %shuffle
+}
+
+define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x i32> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define <2 x i16> @extract_elt13_v4i16readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt13_v4i16readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 1
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 2
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(<2 x i32> %src) convergent {
+; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[T:%.*]] = call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0
+; CHECK-NEXT:    ret i32 [[ELT]]
+;
+  %t = call token @llvm.experimental.convergence.entry()
+  %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) [ "convergencectrl"(token %t) ]
+  %elt = extractelement <2 x i32> %vec, i32 0
+  ret i32 %elt
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) convergent {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[T:%.*]] = call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %t = call token @llvm.experimental.convergence.entry()
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3
+  %vec = call <4 ...
[truncated]

@arsenm arsenm marked this pull request as ready for review February 25, 2025 07:44
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Feb 25, 2025
Copy link
Contributor Author

arsenm commented Feb 28, 2025

Merge activity

  • Feb 28, 12:52 AM EST: A user started a stack merge that includes this pull request via Graphite.
  • Feb 28, 12:54 AM EST: Graphite rebased this pull request as part of a merge.
  • Feb 28, 12:57 AM EST: A user merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/amdgpu/add-baseline-tests-simplify-demanded-vector-elts-readfirstlane branch from fe33057 to 859a8cb Compare February 28, 2025 05:54
@arsenm arsenm merged commit 2fa6c52 into main Feb 28, 2025
6 of 10 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/add-baseline-tests-simplify-demanded-vector-elts-readfirstlane branch February 28, 2025 05:57
cheezeburglar pushed a commit to cheezeburglar/llvm-project that referenced this pull request Feb 28, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants