Skip to content

Commit c2256b0

Browse files
committed
address comments
1 parent 52b765b commit c2256b0

File tree

3 files changed

+30
-16
lines changed

3 files changed

+30
-16
lines changed

llvm/docs/NVPTXUsage.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ Syntax:
330330

331331
.. code-block:: llvm
332332
333-
declare i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c)
333+
declare i32 @llvm.nvvm.fshl.clamp.i32(i32 %hi, i32 %lo, i32 %n)
334334
335335
Overview:
336336
"""""""""
@@ -344,11 +344,11 @@ Semantics:
344344
""""""""""
345345

346346
The '``llvm.nvvm.fshl.clamp``' family of intrinsic functions performs a clamped
347-
funnel shift left: the first two values are concatenated as { %a : %b } (%a is
348-
the most significant bits of the wide value), the combined value is shifted
347+
funnel shift left: the first two values are concatenated as { %hi : %lo } (%hi
348+
is the most significant bits of the wide value), the combined value is shifted
349349
left, and the most significant bits are extracted to produce a result that is
350350
the same size as the original arguments. The shift amount is the minimum of the
351-
value of %c and the bit width of the integer type.
351+
value of %n and the bit width of the integer type.
352352

353353
'``llvm.nvvm.fshr.clamp.*``' Intrinsic
354354
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -358,7 +358,7 @@ Syntax:
358358

359359
.. code-block:: llvm
360360
361-
declare i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c)
361+
declare i32 @llvm.nvvm.fshr.clamp.i32(i32 %hi, i32 %lo, i32 %n)
362362
363363
Overview:
364364
"""""""""
@@ -372,11 +372,11 @@ Semantics:
372372
""""""""""
373373

374374
The '``llvm.nvvm.fshr.clamp``' family of intrinsic functions performs a clamped
375-
funnel shift right: the first two values are concatenated as { %a : %b } (%a is
376-
the most significant bits of the wide value), the combined value is shifted
375+
funnel shift right: the first two values are concatenated as { %hi : %lo } (%hi
376+
is the most significant bits of the wide value), the combined value is shifted
377377
right, and the least significant bits are extracted to produce a result that is
378378
the same size as the original arguments. The shift amount is the minimum of the
379-
value of %c and the bit width of the integer type.
379+
value of %n and the bit width of the integer type.
380380

381381

382382
Other Intrinsics

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -395,13 +395,13 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
395395
II->getArgOperand(0), II->getName());
396396

397397
case SCP_FunnelShiftClamp: {
398-
// Canoncialize a clamping funnel shift to the generic llvm funnel shift
398+
// Canonicalize a clamping funnel shift to the generic llvm funnel shift
399399
// when possible, as this is easier for llvm to optimize further.
400400
if (const auto *ShiftConst = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
401+
const bool IsLeft = II->getIntrinsicID() == Intrinsic::nvvm_fshl_clamp;
401402
if (ShiftConst->getZExtValue() >= II->getType()->getIntegerBitWidth())
402-
return IC.replaceInstUsesWith(*II, II->getArgOperand(1));
403+
return IC.replaceInstUsesWith(*II, II->getArgOperand(IsLeft ? 1 : 0));
403404

404-
const bool IsLeft = II->getIntrinsicID() == Intrinsic::nvvm_fshl_clamp;
405405
const unsigned FshIID = IsLeft ? Intrinsic::fshl : Intrinsic::fshr;
406406
return CallInst::Create(Intrinsic::getOrInsertDeclaration(
407407
II->getModule(), FshIID, II->getType()),

llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -385,33 +385,47 @@ define float @test_sqrt_rn_f_ftz(float %a) #0 {
385385
}
386386

387387
; CHECK-LABEL: @test_fshl_clamp_1
388-
define i32 @test_fshl_clamp_1(i32 %a, i32 %b, i32 %c) {
388+
define i32 @test_fshl_clamp_1(i32 %a, i32 %b) {
389389
; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 3)
390390
%call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 3)
391391
ret i32 %call
392392
}
393393

394394
; CHECK-LABEL: @test_fshl_clamp_2
395-
define i32 @test_fshl_clamp_2(i32 %a, i32 %b, i32 %c) {
395+
define i32 @test_fshl_clamp_2(i32 %a, i32 %b) {
396396
; CHECK: ret i32 %b
397397
%call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 300)
398398
ret i32 %call
399399
}
400400

401+
; CHECK-LABEL: @test_fshl_clamp_3
402+
define i32 @test_fshl_clamp_3(i32 %a, i32 %b, i32 %c) {
403+
; CHECK: call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c)
404+
%call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c)
405+
ret i32 %call
406+
}
407+
401408
; CHECK-LABEL: @test_fshr_clamp_1
402-
define i32 @test_fshr_clamp_1(i32 %a, i32 %b, i32 %c) {
409+
define i32 @test_fshr_clamp_1(i32 %a, i32 %b) {
403410
; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 29)
404411
%call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 3)
405412
ret i32 %call
406413
}
407414

408415
; CHECK-LABEL: @test_fshr_clamp_2
409-
define i32 @test_fshr_clamp_2(i32 %a, i32 %b, i32 %c) {
410-
; CHECK: ret i32 %b
416+
define i32 @test_fshr_clamp_2(i32 %a, i32 %b) {
417+
; CHECK: ret i32 %a
411418
%call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 300)
412419
ret i32 %call
413420
}
414421

422+
; CHECK-LABEL: @test_fshr_clamp_3
423+
define i32 @test_fshr_clamp_3(i32 %a, i32 %b, i32 %c) {
424+
; CHECK: call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c)
425+
%call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c)
426+
ret i32 %call
427+
}
428+
415429
declare double @llvm.nvvm.add.rn.d(double, double)
416430
declare float @llvm.nvvm.add.rn.f(float, float)
417431
declare float @llvm.nvvm.add.rn.ftz.f(float, float)

0 commit comments

Comments
 (0)