Skip to content

Commit 07c7d9b

Browse files
committed
address feedback
1 parent 2f4f1da commit 07c7d9b

File tree

4 files changed

+46
-40
lines changed

4 files changed

+46
-40
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5296,11 +5296,32 @@ SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
52965296
return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
52975297
}
52985298

5299-
bool AMDGPUTargetLowering::canMov64bImm(uint64_t Val, SelectionDAG &DAG) const {
5299+
bool AMDGPUTargetLowering::isInt64ImmLegal(SDNode *N, SelectionDAG &DAG) const {
53005300
if (!Subtarget->isGCN())
53015301
return false;
5302+
5303+
ConstantSDNode *SDConstant = dyn_cast<ConstantSDNode>(N);
5304+
ConstantFPSDNode *SDFPConstant = dyn_cast<ConstantFPSDNode>(N);
53025305
auto &ST = DAG.getSubtarget<GCNSubtarget>();
5303-
return ST.hasMovB64() && (ST.has64BitLiterals() || isUInt<32>(Val));
5306+
bool isInlineable = false;
5307+
const auto *TII = ST.getInstrInfo();
5308+
5309+
if (!SDConstant && !SDFPConstant)
5310+
return false;
5311+
5312+
uint64_t Val = 0;
5313+
if (SDConstant) {
5314+
const APInt &APVal = SDConstant->getAPIntValue();
5315+
isInlineable = TII->isInlineConstant(APVal);
5316+
Val = APVal.getZExtValue();
5317+
} else if (SDFPConstant) {
5318+
const APFloat &APVal = SDFPConstant->getValueAPF();
5319+
isInlineable = TII->isInlineConstant(APVal);
5320+
Val = APVal.bitcastToAPInt().getZExtValue();
5321+
}
5322+
5323+
return ST.hasMovB64() &&
5324+
(ST.has64BitLiterals() || isUInt<32>(Val) || isInlineable);
53045325
}
53055326

53065327
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
@@ -5352,9 +5373,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
53525373
SDValue Src = N->getOperand(0);
53535374
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
53545375
SDLoc SL(N);
5355-
uint64_t CVal = C->getZExtValue();
5356-
if (canMov64bImm(CVal, DAG))
5376+
if (isInt64ImmLegal(C, DAG))
53575377
break;
5378+
uint64_t CVal = C->getZExtValue();
53585379
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
53595380
DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
53605381
DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
@@ -5364,9 +5385,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
53645385
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
53655386
const APInt &Val = C->getValueAPF().bitcastToAPInt();
53665387
SDLoc SL(N);
5367-
uint64_t CVal = Val.getZExtValue();
5368-
if (canMov64bImm(CVal, DAG))
5388+
if (isInt64ImmLegal(C, DAG))
53695389
break;
5390+
uint64_t CVal = Val.getZExtValue();
53705391
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
53715392
DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
53725393
DAG.getConstant(Hi_32(CVal), SL, MVT::i32));

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class AMDGPUTargetLowering : public TargetLowering {
105105
protected:
106106
/// Check whether value Val can be supported by v_mov_b64, for the current
107107
/// target.
108-
bool canMov64bImm(uint64_t Val, SelectionDAG &DAG) const;
108+
bool isInt64ImmLegal(SDNode *Val, SelectionDAG &DAG) const;
109109
bool shouldCombineMemoryType(EVT VT) const;
110110
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
111111
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14591,23 +14591,20 @@ SITargetLowering::performExtractVectorEltCombine(SDNode *N,
1459114591
auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
1459214592
if (Vec.getOpcode() == ISD::BITCAST && VecVT == MVT::v2i32 && Idx) {
1459314593
SDLoc SL(N);
14594-
SDValue PeekThrough = peekThroughBitcasts(Vec);
14594+
SDValue PeekThrough = Vec.getOperand(0);
1459514595
auto *KImm = dyn_cast<ConstantSDNode>(PeekThrough);
1459614596
if (KImm && KImm->getValueType(0).getSizeInBits() == 64) {
1459714597
uint64_t KImmValue = KImm->getZExtValue();
14598-
if (Idx->getZExtValue() == 0)
14599-
return DAG.getConstant(Lo_32(KImmValue), SL, MVT::i32);
14600-
else
14601-
return DAG.getConstant(Hi_32(KImmValue), SL, MVT::i32);
14598+
return DAG.getConstant(
14599+
(KImmValue >> (32 * Idx->getZExtValue())) & 0xffffffff, SL, MVT::i32);
1460214600
}
1460314601
auto *KFPImm = dyn_cast<ConstantFPSDNode>(PeekThrough);
1460414602
if (KFPImm && KFPImm->getValueType(0).getSizeInBits() == 64) {
1460514603
uint64_t KFPImmValue =
1460614604
KFPImm->getValueAPF().bitcastToAPInt().getZExtValue();
14607-
if (Idx->getZExtValue() == 0)
14608-
return DAG.getConstant(Lo_32(KFPImmValue), SL, MVT::i32);
14609-
else
14610-
return DAG.getConstant(Hi_32(KFPImmValue), SL, MVT::i32);
14605+
return DAG.getConstant((KFPImmValue >> (32 * Idx->getZExtValue())) &
14606+
0xffffffff,
14607+
SL, MVT::i32);
1461114608
}
1461214609
}
1461314610

llvm/test/CodeGen/AMDGPU/imm.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,10 +1969,9 @@ define amdgpu_kernel void @add_inline_imm_neg_1_f64(ptr addrspace(1) %out, [8 x
19691969
; GFX942-LABEL: add_inline_imm_neg_1_f64:
19701970
; GFX942: ; %bb.0:
19711971
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1972-
; GFX942-NEXT: v_mov_b32_e32 v0, -1
19731972
; GFX942-NEXT: s_mov_b32 s3, 0xf000
19741973
; GFX942-NEXT: s_mov_b32 s2, -1
1975-
; GFX942-NEXT: v_mov_b32_e32 v1, v0
1974+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], -1
19761975
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
19771976
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
19781977
; GFX942-NEXT: s_endpgm
@@ -2009,8 +2008,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2_f64(ptr addrspace(1) %out, [8 x
20092008
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
20102009
; GFX942-NEXT: s_mov_b32 s3, 0xf000
20112010
; GFX942-NEXT: s_mov_b32 s2, -1
2012-
; GFX942-NEXT: v_mov_b32_e32 v0, -2
2013-
; GFX942-NEXT: v_mov_b32_e32 v1, -1
2011+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], -2
20142012
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
20152013
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
20162014
; GFX942-NEXT: s_endpgm
@@ -2047,8 +2045,7 @@ define amdgpu_kernel void @add_inline_imm_neg_16_f64(ptr addrspace(1) %out, [8 x
20472045
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
20482046
; GFX942-NEXT: s_mov_b32 s3, 0xf000
20492047
; GFX942-NEXT: s_mov_b32 s2, -1
2050-
; GFX942-NEXT: v_mov_b32_e32 v0, -16
2051-
; GFX942-NEXT: v_mov_b32_e32 v1, -1
2048+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], -16
20522049
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
20532050
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
20542051
; GFX942-NEXT: s_endpgm
@@ -2238,8 +2235,7 @@ define amdgpu_kernel void @store_inline_imm_0.5_f64(ptr addrspace(1) %out) {
22382235
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
22392236
; GFX942-NEXT: s_mov_b32 s3, 0xf000
22402237
; GFX942-NEXT: s_mov_b32 s2, -1
2241-
; GFX942-NEXT: v_mov_b32_e32 v0, 0
2242-
; GFX942-NEXT: v_mov_b32_e32 v1, 0x3fe00000
2238+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0.5
22432239
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
22442240
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
22452241
; GFX942-NEXT: s_endpgm
@@ -2275,8 +2271,7 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_f64(ptr addrspace(1) %out) {
22752271
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
22762272
; GFX942-NEXT: s_mov_b32 s3, 0xf000
22772273
; GFX942-NEXT: s_mov_b32 s2, -1
2278-
; GFX942-NEXT: v_mov_b32_e32 v0, 0
2279-
; GFX942-NEXT: v_mov_b32_e32 v1, 0xbfe00000
2274+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], -0.5
22802275
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
22812276
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
22822277
; GFX942-NEXT: s_endpgm
@@ -2312,8 +2307,7 @@ define amdgpu_kernel void @store_inline_imm_1.0_f64(ptr addrspace(1) %out) {
23122307
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
23132308
; GFX942-NEXT: s_mov_b32 s3, 0xf000
23142309
; GFX942-NEXT: s_mov_b32 s2, -1
2315-
; GFX942-NEXT: v_mov_b32_e32 v0, 0
2316-
; GFX942-NEXT: v_mov_b32_e32 v1, 0x3ff00000
2310+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1.0
23172311
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
23182312
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
23192313
; GFX942-NEXT: s_endpgm
@@ -2349,8 +2343,7 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_f64(ptr addrspace(1) %out) {
23492343
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
23502344
; GFX942-NEXT: s_mov_b32 s3, 0xf000
23512345
; GFX942-NEXT: s_mov_b32 s2, -1
2352-
; GFX942-NEXT: v_mov_b32_e32 v0, 0
2353-
; GFX942-NEXT: v_mov_b32_e32 v1, 0xbff00000
2346+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], -1.0
23542347
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
23552348
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
23562349
; GFX942-NEXT: s_endpgm
@@ -2386,8 +2379,7 @@ define amdgpu_kernel void @store_inline_imm_2.0_f64(ptr addrspace(1) %out) {
23862379
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
23872380
; GFX942-NEXT: s_mov_b32 s3, 0xf000
23882381
; GFX942-NEXT: s_mov_b32 s2, -1
2389-
; GFX942-NEXT: v_mov_b32_e32 v0, 0
2390-
; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
2382+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 2.0
23912383
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
23922384
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
23932385
; GFX942-NEXT: s_endpgm
@@ -2423,8 +2415,7 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_f64(ptr addrspace(1) %out) {
24232415
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
24242416
; GFX942-NEXT: s_mov_b32 s3, 0xf000
24252417
; GFX942-NEXT: s_mov_b32 s2, -1
2426-
; GFX942-NEXT: v_mov_b32_e32 v0, 0
2427-
; GFX942-NEXT: v_mov_b32_e32 v1, -2.0
2418+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], -2.0
24282419
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
24292420
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
24302421
; GFX942-NEXT: s_endpgm
@@ -2460,8 +2451,7 @@ define amdgpu_kernel void @store_inline_imm_4.0_f64(ptr addrspace(1) %out) {
24602451
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
24612452
; GFX942-NEXT: s_mov_b32 s3, 0xf000
24622453
; GFX942-NEXT: s_mov_b32 s2, -1
2463-
; GFX942-NEXT: v_mov_b32_e32 v0, 0
2464-
; GFX942-NEXT: v_mov_b32_e32 v1, 0x40100000
2454+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0
24652455
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
24662456
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
24672457
; GFX942-NEXT: s_endpgm
@@ -2497,8 +2487,7 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_f64(ptr addrspace(1) %out) {
24972487
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
24982488
; GFX942-NEXT: s_mov_b32 s3, 0xf000
24992489
; GFX942-NEXT: s_mov_b32 s2, -1
2500-
; GFX942-NEXT: v_mov_b32_e32 v0, 0
2501-
; GFX942-NEXT: v_mov_b32_e32 v1, 0xc0100000
2490+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], -4.0
25022491
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
25032492
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
25042493
; GFX942-NEXT: s_endpgm
@@ -2534,8 +2523,7 @@ define amdgpu_kernel void @store_inv_2pi_f64(ptr addrspace(1) %out) {
25342523
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
25352524
; GFX942-NEXT: s_mov_b32 s3, 0xf000
25362525
; GFX942-NEXT: s_mov_b32 s2, -1
2537-
; GFX942-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
2538-
; GFX942-NEXT: v_mov_b32_e32 v1, 0x3fc45f30
2526+
; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0.15915494309189532
25392527
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
25402528
; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
25412529
; GFX942-NEXT: s_endpgm

0 commit comments

Comments
 (0)