Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Nov 28, 2024

No description provided.

Copy link
Contributor Author

arsenm commented Nov 28, 2024

@llvmbot
Copy link
Member

llvmbot commented Nov 28, 2024

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/117962.diff

1 Files Affected:

  • (added) llvm/test/Transforms/InstCombine/AMDGPU/lane-index-simplify-demanded-bits.ll (+205)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/lane-index-simplify-demanded-bits.ll b/llvm/test/Transforms/InstCombine/AMDGPU/lane-index-simplify-demanded-bits.ll
new file mode 100644
index 00000000000000..b686f447b8d3c9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/lane-index-simplify-demanded-bits.ll
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,WAVE64 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mattr=+wavefrontsize32 -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,WAVE32 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64 -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,WAVE64 %s
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.readlane
+; --------------------------------------------------------------------
+
+define i32 @readlane_31(i32 %arg) #0 {
+; CHECK-LABEL: define i32 @readlane_31(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 31)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.readlane.i32(i32 %arg, i32 31)
+  ret i32 %res
+}
+
+define i32 @readlane_32(i32 %arg) #0 {
+; CHECK-LABEL: define i32 @readlane_32(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 32)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.readlane.i32(i32 %arg, i32 32)
+  ret i32 %res
+}
+
+define i32 @readlane_33(i32 %arg) #0 {
+; CHECK-LABEL: define i32 @readlane_33(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 33)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.readlane.i32(i32 %arg, i32 33)
+  ret i32 %res
+}
+
+define i32 @readlane_63(i32 %arg) #0 {
+; CHECK-LABEL: define i32 @readlane_63(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 63)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.readlane.i32(i32 %arg, i32 63)
+  ret i32 %res
+}
+
+define i32 @readlane_64(i32 %arg) #0 {
+; CHECK-LABEL: define i32 @readlane_64(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 64)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.readlane.i32(i32 %arg, i32 64)
+  ret i32 %res
+}
+
+define i32 @readlane_and_31(i32 %arg, i32 %idx) #0 {
+; CHECK-LABEL: define i32 @readlane_and_31(
+; CHECK-SAME: i32 [[ARG:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[IDX_CLAMP:%.*]] = and i32 [[IDX]], 31
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[IDX_CLAMP]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %idx.clamp = and i32 %idx, 31
+  %res = call i32 @llvm.amdgcn.readlane.i32(i32 %arg, i32 %idx.clamp)
+  ret i32 %res
+}
+
+define i32 @readlane_and_63(i32 %arg, i32 %idx) #0 {
+; CHECK-LABEL: define i32 @readlane_and_63(
+; CHECK-SAME: i32 [[ARG:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[IDX_CLAMP:%.*]] = and i32 [[IDX]], 63
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[IDX_CLAMP]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %idx.clamp = and i32 %idx, 63
+  %res = call i32 @llvm.amdgcn.readlane.i32(i32 %arg, i32 %idx.clamp)
+  ret i32 %res
+}
+
+define i32 @readlane_poison(i32 %arg) #0 {
+; CHECK-LABEL: define i32 @readlane_poison(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 poison)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.readlane.i32(i32 %arg, i32 poison)
+  ret i32 %res
+}
+
+define float @readlane_f32_63(float %arg) #0 {
+; CHECK-LABEL: define float @readlane_f32_63(
+; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 63)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.amdgcn.readlane.f32(float %arg, i32 63)
+  ret float %res
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.writelane
+; --------------------------------------------------------------------
+
+define i32 @writelane_31(i32 %arg0, i32 %arg1) #0 {
+; CHECK-LABEL: define i32 @writelane_31(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[ARG0]], i32 31, i32 [[ARG1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.writelane.i32(i32 %arg0, i32 31, i32 %arg1)
+  ret i32 %res
+}
+
+define i32 @writelane_32(i32 %arg0, i32 %arg1) #0 {
+; CHECK-LABEL: define i32 @writelane_32(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[ARG0]], i32 32, i32 [[ARG1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.writelane.i32(i32 %arg0, i32 32, i32 %arg1)
+  ret i32 %res
+}
+
+define i32 @writelane_33(i32 %arg0, i32 %arg1) #0 {
+; CHECK-LABEL: define i32 @writelane_33(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[ARG0]], i32 33, i32 [[ARG1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.writelane.i32(i32 %arg0, i32 33, i32 %arg1)
+  ret i32 %res
+}
+
+define i32 @writelane_63(i32 %arg0, i32 %arg1) #0 {
+; CHECK-LABEL: define i32 @writelane_63(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[ARG0]], i32 63, i32 [[ARG1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.writelane.i32(i32 %arg0, i32 63, i32 %arg1)
+  ret i32 %res
+}
+
+define i32 @writelane_64(i32 %arg0, i32 %arg1) #0 {
+; CHECK-LABEL: define i32 @writelane_64(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[ARG0]], i32 64, i32 [[ARG1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.writelane.i32(i32 %arg0, i32 64, i32 %arg1)
+  ret i32 %res
+}
+
+define i32 @writelane_and_31(i32 %arg0, i32 %arg1, i32 %idx) #0 {
+; CHECK-LABEL: define i32 @writelane_and_31(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[IDX_CLAMP:%.*]] = and i32 [[IDX]], 31
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[ARG0]], i32 [[IDX_CLAMP]], i32 [[ARG1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %idx.clamp = and i32 %idx, 31
+  %res = call i32 @llvm.amdgcn.writelane.i32(i32 %arg0, i32 %idx.clamp, i32 %arg1)
+  ret i32 %res
+}
+
+define i32 @writelane_and_63(i32 %arg0, i32 %arg1, i32 %idx) #0 {
+; CHECK-LABEL: define i32 @writelane_and_63(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[IDX_CLAMP:%.*]] = and i32 [[IDX]], 63
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[ARG0]], i32 [[IDX_CLAMP]], i32 [[ARG1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %idx.clamp = and i32 %idx, 63
+  %res = call i32 @llvm.amdgcn.writelane.i32(i32 %arg0, i32 %idx.clamp, i32 %arg1)
+  ret i32 %res
+}
+
+define i32 @writelane_poison(i32 %arg0, i32 %arg1) #0 {
+; CHECK-LABEL: define i32 @writelane_poison(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[ARG0]], i32 poison, i32 [[ARG1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.amdgcn.writelane.i32(i32 %arg0, i32 poison, i32 %arg1)
+  ret i32 %res
+}
+
+define float @writelane_f32_63(float %arg0, float %arg1) #0 {
+; CHECK-LABEL: define float @writelane_f32_63(
+; CHECK-SAME: float [[ARG0:%.*]], float [[ARG1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.amdgcn.writelane.f32(float [[ARG0]], i32 63, float [[ARG1]])
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.amdgcn.writelane.f32(float %arg0, i32 63, float %arg1)
+  ret float %res
+}
+
+attributes #0 = { nounwind }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; WAVE32: {{.*}}
+; WAVE64: {{.*}}

@arsenm arsenm marked this pull request as ready for review November 28, 2024 03:54
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Nov 28, 2024
@arsenm arsenm force-pushed the users/arsenm/amdgpu-baseline-test-lane-index-simplify branch from 3363529 to bd8f0b9 Compare December 2, 2024 13:35
Copy link
Contributor Author

arsenm commented Dec 2, 2024

Merge activity

  • Dec 2, 2:49 PM EST: A user started a stack merge that includes this pull request via Graphite.
  • Dec 2, 2:50 PM EST: A user merged this pull request with Graphite.

@arsenm arsenm merged commit 681bd84 into main Dec 2, 2024
8 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu-baseline-test-lane-index-simplify branch December 2, 2024 19:50
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants