Skip to content

Commit 78281fd

Browse files
committed
Revert "[AMDGPU] InstCombine llvm.amdgcn.ds.bpermute with uniform arguments (#129895)"
This reverts commit be5149a. It caused build failures in the openmp-offload-amdgpu-runtime buildbot and others.
1 parent 9189d84 commit 78281fd

File tree

2 files changed

+3
-60
lines changed

2 files changed

+3
-60
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,11 +1118,9 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
11181118
}
11191119
case Intrinsic::amdgcn_permlane64:
11201120
case Intrinsic::amdgcn_readfirstlane:
1121-
case Intrinsic::amdgcn_readlane:
1122-
case Intrinsic::amdgcn_ds_bpermute: {
1123-
// If the data argument is uniform these intrinsics return it unchanged.
1124-
unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1125-
const Use &Src = II.getArgOperandUse(SrcIdx);
1121+
case Intrinsic::amdgcn_readlane: {
1122+
// If the first argument is uniform these intrinsics return it unchanged.
1123+
const Use &Src = II.getArgOperandUse(0);
11261124
if (isTriviallyUniform(Src))
11271125
return IC.replaceInstUsesWith(II, Src.get());
11281126

@@ -1154,22 +1152,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
11541152
}
11551153
}
11561154

1157-
// If the lane argument of bpermute is uniform, change it to readlane. This
1158-
// generates better code and can enable further optimizations because
1159-
// readlane is AlwaysUniform.
1160-
if (IID == Intrinsic::amdgcn_ds_bpermute) {
1161-
const Use &Lane = II.getArgOperandUse(0);
1162-
if (isTriviallyUniform(Lane)) {
1163-
Value *NewLane = IC.Builder.CreateLShr(Lane, 2);
1164-
Function *NewDecl = Intrinsic::getOrInsertDeclaration(
1165-
II.getModule(), Intrinsic::amdgcn_readlane, II.getType());
1166-
II.setCalledFunction(NewDecl);
1167-
II.setOperand(0, Src);
1168-
II.setOperand(1, NewLane);
1169-
return ⅈ
1170-
}
1171-
}
1172-
11731155
return std::nullopt;
11741156
}
11751157
case Intrinsic::amdgcn_writelane: {

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6583,42 +6583,3 @@ define i32 @prng_poison_i32() {
65836583
%prng = call i32 @llvm.amdgcn.prng.b32(i32 poison)
65846584
ret i32 %prng
65856585
}
6586-
6587-
; --------------------------------------------------------------------
6588-
; llvm.amdgcn.ds.bpermute
6589-
; --------------------------------------------------------------------
6590-
6591-
define amdgpu_kernel void @ds_bpermute_uniform_src(ptr addrspace(1) %out, i32 %lane) {
6592-
; CHECK-LABEL: @ds_bpermute_uniform_src(
6593-
; CHECK-NEXT: store i32 7, ptr addrspace(1) [[OUT:%.*]], align 4
6594-
; CHECK-NEXT: ret void
6595-
;
6596-
%v = call i32 @llvm.amdgcn.ds.bpermute(i32 %lane, i32 7)
6597-
store i32 %v, ptr addrspace(1) %out
6598-
ret void
6599-
}
6600-
6601-
define amdgpu_kernel void @ds_bpermute_constant_lane(ptr addrspace(1) %out, i32 %src) {
6602-
; CHECK-LABEL: @ds_bpermute_constant_lane(
6603-
; CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[SRC:%.*]], i32 7)
6604-
; CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT:%.*]], align 4
6605-
; CHECK-NEXT: ret void
6606-
;
6607-
%v = call i32 @llvm.amdgcn.ds.bpermute(i32 28, i32 %src)
6608-
store i32 %v, ptr addrspace(1) %out
6609-
ret void
6610-
}
6611-
6612-
define amdgpu_kernel void @ds_bpermute_uniform_lane(ptr addrspace(1) %out, i32 %lanearg, i32 %src) {
6613-
; CHECK-LABEL: @ds_bpermute_uniform_lane(
6614-
; CHECK-NEXT: [[LANE:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[LANEARG:%.*]])
6615-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[LANE]], 2
6616-
; CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[SRC:%.*]], i32 [[TMP1]])
6617-
; CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT:%.*]], align 4
6618-
; CHECK-NEXT: ret void
6619-
;
6620-
%lane = call i32 @llvm.amdgcn.readfirstlane(i32 %lanearg)
6621-
%v = call i32 @llvm.amdgcn.ds.bpermute(i32 %lane, i32 %src)
6622-
store i32 %v, ptr addrspace(1) %out
6623-
ret void
6624-
}

0 commit comments

Comments
 (0)