Skip to content

Commit f3c3a66

Browse files
committed
AMDGPU: Consider isVGPRImm when forming constant from build_vector
This probably should have turned into a regular integer constant earlier. This is to defend against future regressions.
1 parent d8f6e10 commit f3c3a66

File tree

4 files changed

+44
-39
lines changed

4 files changed

+44
-39
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,24 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
470470
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
471471
}
472472

473+
SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(const SDNode *N,
474+
SelectionDAG &DAG) const {
475+
// TODO: Handle undef as zero
476+
477+
assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
478+
uint32_t LHSVal, RHSVal;
479+
if (getConstantValue(N->getOperand(0), LHSVal) &&
480+
getConstantValue(N->getOperand(1), RHSVal)) {
481+
SDLoc SL(N);
482+
uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
483+
return DAG.getMachineNode(
484+
isVGPRImm(N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
485+
N->getValueType(0), DAG.getTargetConstant(K, SL, MVT::i32));
486+
}
487+
488+
return nullptr;
489+
}
490+
473491
void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
474492
EVT VT = N->getValueType(0);
475493
unsigned NumVectorElts = VT.getVectorNumElements();

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -45,21 +45,6 @@ static inline bool getConstantValue(SDValue N, uint32_t &Out) {
4545
return false;
4646
}
4747

48-
// TODO: Handle undef as zero
49-
static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
50-
assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
51-
uint32_t LHSVal, RHSVal;
52-
if (getConstantValue(N->getOperand(0), LHSVal) &&
53-
getConstantValue(N->getOperand(1), RHSVal)) {
54-
SDLoc SL(N);
55-
uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
56-
return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
57-
DAG.getTargetConstant(K, SL, MVT::i32));
58-
}
59-
60-
return nullptr;
61-
}
62-
6348
/// AMDGPU specific code to select AMDGPU machine instructions for
6449
/// SelectionDAG operations.
6550
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -115,6 +100,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
115100

116101
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
117102

103+
SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) const;
104+
118105
SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
119106
SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
120107
SDNode *glueCopyToM0LDSInit(SDNode *N) const;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -497,10 +497,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_
497497
; GFX1030: ; %bb.0: ; %main_body
498498
; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
499499
; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 2, v0
500-
; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0
501-
; GFX1030-NEXT: v_mov_b32_e32 v5, 0x44004200
502-
; GFX1030-NEXT: v_mov_b32_e32 v6, 0x46004500
503500
; GFX1030-NEXT: v_mov_b32_e32 v7, 0x48004700
501+
; GFX1030-NEXT: v_mov_b32_e32 v6, 0x46004500
502+
; GFX1030-NEXT: v_mov_b32_e32 v5, 0x44004200
503+
; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0
504504
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
505505
; GFX1030-NEXT: v_add_co_u32 v0, s0, s0, v2
506506
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
@@ -810,11 +810,11 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
810810
; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
811811
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 2, v0
812812
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
813+
; GFX1030-NEXT: v_mov_b32_e32 v8, 0x48004700
814+
; GFX1030-NEXT: v_mov_b32_e32 v7, 0x46004500
815+
; GFX1030-NEXT: v_mov_b32_e32 v6, 0x44004200
813816
; GFX1030-NEXT: v_mov_b32_e32 v5, 2.0
814817
; GFX1030-NEXT: v_mov_b32_e32 v4, 1.0
815-
; GFX1030-NEXT: v_mov_b32_e32 v6, 0x44004200
816-
; GFX1030-NEXT: v_mov_b32_e32 v7, 0x46004500
817-
; GFX1030-NEXT: v_mov_b32_e32 v8, 0x48004700
818818
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
819819
; GFX1030-NEXT: v_add_co_u32 v0, s4, s6, v0
820820
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s7, 0, s4

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -746,9 +746,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
746746
; GFX9-NEXT: s_mov_b64 exec, 0
747747
; GFX9-NEXT: .LBB6_6: ; %.continue1
748748
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
749-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
750-
; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
751-
; GFX9-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
749+
; GFX9-NEXT: v_bfrev_b32_e32 v0, 60
750+
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3c00
751+
; GFX9-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
752752
; GFX9-NEXT: s_endpgm
753753
; GFX9-NEXT: .LBB6_7:
754754
; GFX9-NEXT: s_mov_b64 exec, 0
@@ -792,9 +792,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
792792
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
793793
; GFX10-32-NEXT: .LBB6_6: ; %.continue1
794794
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
795-
; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
796-
; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
797-
; GFX10-32-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
795+
; GFX10-32-NEXT: v_bfrev_b32_e32 v0, 60
796+
; GFX10-32-NEXT: v_mov_b32_e32 v1, 0x3c00
797+
; GFX10-32-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
798798
; GFX10-32-NEXT: s_endpgm
799799
; GFX10-32-NEXT: .LBB6_7:
800800
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
@@ -838,9 +838,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
838838
; GFX10-64-NEXT: s_mov_b64 exec, 0
839839
; GFX10-64-NEXT: .LBB6_6: ; %.continue1
840840
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
841-
; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
842-
; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
843-
; GFX10-64-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
841+
; GFX10-64-NEXT: v_bfrev_b32_e32 v0, 60
842+
; GFX10-64-NEXT: v_mov_b32_e32 v1, 0x3c00
843+
; GFX10-64-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
844844
; GFX10-64-NEXT: s_endpgm
845845
; GFX10-64-NEXT: .LBB6_7:
846846
; GFX10-64-NEXT: s_mov_b64 exec, 0
@@ -1005,9 +1005,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
10051005
; GFX9-NEXT: .LBB7_8: ; %.return
10061006
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
10071007
; GFX9-NEXT: s_and_b64 exec, exec, s[0:1]
1008-
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
1009-
; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
1010-
; GFX9-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
1008+
; GFX9-NEXT: v_bfrev_b32_e32 v0, 60
1009+
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3c00
1010+
; GFX9-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
10111011
; GFX9-NEXT: s_endpgm
10121012
; GFX9-NEXT: .LBB7_9:
10131013
; GFX9-NEXT: s_mov_b64 exec, 0
@@ -1068,9 +1068,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
10681068
; GFX10-32-NEXT: .LBB7_8: ; %.return
10691069
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
10701070
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0
1071-
; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
1072-
; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
1073-
; GFX10-32-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
1071+
; GFX10-32-NEXT: v_bfrev_b32_e32 v0, 60
1072+
; GFX10-32-NEXT: v_mov_b32_e32 v1, 0x3c00
1073+
; GFX10-32-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
10741074
; GFX10-32-NEXT: s_endpgm
10751075
; GFX10-32-NEXT: .LBB7_9:
10761076
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
@@ -1131,9 +1131,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
11311131
; GFX10-64-NEXT: .LBB7_8: ; %.return
11321132
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
11331133
; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1]
1134-
; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
1135-
; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
1136-
; GFX10-64-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
1134+
; GFX10-64-NEXT: v_bfrev_b32_e32 v0, 60
1135+
; GFX10-64-NEXT: v_mov_b32_e32 v1, 0x3c00
1136+
; GFX10-64-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
11371137
; GFX10-64-NEXT: s_endpgm
11381138
; GFX10-64-NEXT: .LBB7_9:
11391139
; GFX10-64-NEXT: s_mov_b64 exec, 0

0 commit comments

Comments
 (0)