Skip to content

Commit ad6c4d9

Browse files
committed
Using CallInst directly actually works
1 parent 2b4c590 commit ad6c4d9

File tree

3 files changed

+13
-12
lines changed

3 files changed

+13
-12
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,18 +1134,19 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
11341134

11351135
// TODO: Handle this for update_dpp, mov_ddp8, and all permlane variants.
11361136
if (isTypeLegal(BCSrc->getType())) {
1137+
Module *M = IC.Builder.GetInsertBlock()->getModule();
1138+
Function *Remangled =
1139+
Intrinsic::getOrInsertDeclaration(M, IID, {BCSrc->getType()});
1140+
11371141
// Make sure convergence tokens are preserved.
11381142
// TODO: CreateIntrinsic should allow directly copying bundles
11391143
SmallVector<OperandBundleDef, 2> OpBundles;
11401144
II.getOperandBundlesAsDefs(OpBundles);
11411145

1142-
IRBuilderBase::OperandBundlesGuard Guard(IC.Builder);
1143-
IC.Builder.setDefaultOperandBundles(OpBundles);
1144-
11451146
SmallVector<Value *, 3> Args(II.args());
11461147
Args[0] = BCSrc;
1147-
CallInst *NewCall = IC.Builder.CreateIntrinsic(
1148-
II.getIntrinsicID(), {BCSrc->getType()}, Args);
1148+
1149+
CallInst *NewCall = IC.Builder.CreateCall(Remangled, Args, OpBundles);
11491150
NewCall->takeName(&II);
11501151
return new BitCastInst(NewCall, II.getType());
11511152
}

llvm/test/Transforms/InstCombine/AMDGPU/bitcast-fold-lane-ops.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,8 @@ define i32 @test_bitcast_f32_to_i32_readfirstlane_convergencetoken(float %val) c
288288
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readfirstlane_convergencetoken(
289289
; CHECK-SAME: float [[VAL:%.*]]) #[[ATTR1]] {
290290
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
291-
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
292-
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) [ "convergencectrl"(token [[T]]) ]
291+
; CHECK-NEXT: [[RESULT1:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[VAL]]) [ "convergencectrl"(token [[T]]) ]
292+
; CHECK-NEXT: [[RESULT:%.*]] = bitcast float [[RESULT1]] to i32
293293
; CHECK-NEXT: ret i32 [[RESULT]]
294294
;
295295
%t = call token @llvm.experimental.convergence.entry()
@@ -302,8 +302,8 @@ define i32 @test_bitcast_f32_to_i32_readlane_convergencetoken(float %val, i32 in
302302
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readlane_convergencetoken(
303303
; CHECK-SAME: float [[VAL:%.*]], i32 inreg [[LANE_INDEX:%.*]]) #[[ATTR1]] {
304304
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
305-
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
306-
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[BITCAST]], i32 [[LANE_INDEX]]) [ "convergencectrl"(token [[T]]) ]
305+
; CHECK-NEXT: [[RESULT1:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL]], i32 [[LANE_INDEX]]) [ "convergencectrl"(token [[T]]) ]
306+
; CHECK-NEXT: [[RESULT:%.*]] = bitcast float [[RESULT1]] to i32
307307
; CHECK-NEXT: ret i32 [[RESULT]]
308308
;
309309
%t = call token @llvm.experimental.convergence.entry()

llvm/test/Transforms/InstCombine/AMDGPU/permlane64.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ define i32 @test_bitcast_f32_to_i32_permlane64(float %val) {
2424
define i32 @test_bitcast_f32_to_i32_permlane64_convergencetokenn(float %val) convergent {
2525
; CHECK-LABEL: @test_bitcast_f32_to_i32_permlane64_convergencetokenn(
2626
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
27-
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL:%.*]] to i32
28-
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[BITCAST]]) [ "convergencectrl"(token [[T]]) ]
29-
; CHECK-NEXT: ret i32 [[RESULT]]
27+
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.permlane64.f32(float [[VAL1:%.*]]) [ "convergencectrl"(token [[T]]) ]
28+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
29+
; CHECK-NEXT: ret i32 [[BITCAST]]
3030
;
3131
%t = call token @llvm.experimental.convergence.entry()
3232
%bitcast = bitcast float %val to i32

0 commit comments

Comments
 (0)