Skip to content

Commit a6a72df

Browse files
committed
[AMDGPU][GlobalISel] Avoid selecting S_PACK with constants
If constants are hidden behind G_ANYEXT we can treat them same way as G_SEXT. For that purpose we extend getConstantVRegValWithLookThrough with option to handle G_ANYEXT same way as G_SEXT. Differential Revision: https://reviews.llvm.org/D92219
1 parent 2ab5fd2 commit a6a72df

File tree

5 files changed

+20
-20
lines changed

5 files changed

+20
-20
lines changed

llvm/include/llvm/CodeGen/GlobalISel/Utils.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,13 @@ struct ValueAndVReg {
146146
/// When \p LookThroughInstrs == false this function behaves like
147147
/// getConstantVRegVal.
148148
/// When \p HandleFConstants == false the function bails on G_FCONSTANTs.
149+
/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as
150+
/// G_SEXT.
149151
Optional<ValueAndVReg>
150152
getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
151153
bool LookThroughInstrs = true,
152-
bool HandleFConstants = true);
154+
bool HandleFConstants = true,
155+
bool LookThroughAnyExt = false);
153156
const ConstantFP* getConstantFPVRegVal(Register VReg,
154157
const MachineRegisterInfo &MRI);
155158

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
283283

284284
Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
285285
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
286-
bool HandleFConstant) {
286+
bool HandleFConstant, bool LookThroughAnyExt) {
287287
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
288288
MachineInstr *MI;
289289
auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
@@ -310,6 +310,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
310310
while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
311311
LookThroughInstrs) {
312312
switch (MI->getOpcode()) {
313+
case TargetOpcode::G_ANYEXT:
314+
if (!LookThroughAnyExt)
315+
return None;
316+
LLVM_FALLTHROUGH;
313317
case TargetOpcode::G_TRUNC:
314318
case TargetOpcode::G_SEXT:
315319
case TargetOpcode::G_ZEXT:
@@ -343,6 +347,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
343347
case TargetOpcode::G_TRUNC:
344348
Val = Val.trunc(OpcodeAndSize.second);
345349
break;
350+
case TargetOpcode::G_ANYEXT:
346351
case TargetOpcode::G_SEXT:
347352
Val = Val.sext(OpcodeAndSize.second);
348353
break;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,9 +597,11 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
597597
const DebugLoc &DL = MI.getDebugLoc();
598598
MachineBasicBlock *BB = MI.getParent();
599599

600-
auto ConstSrc1 = getConstantVRegValWithLookThrough(Src1, *MRI, true, true);
600+
auto ConstSrc1 =
601+
getConstantVRegValWithLookThrough(Src1, *MRI, true, true, true);
601602
if (ConstSrc1) {
602-
auto ConstSrc0 = getConstantVRegValWithLookThrough(Src0, *MRI, true, true);
603+
auto ConstSrc0 =
604+
getConstantVRegValWithLookThrough(Src0, *MRI, true, true, true);
603605
if (ConstSrc0) {
604606
const int64_t K0 = ConstSrc0->Value.getSExtValue();
605607
const int64_t K1 = ConstSrc1->Value.getSExtValue();

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -579,10 +579,8 @@ body: |
579579
bb.0:
580580
581581
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_anyext_constant_anyext_constant
582-
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
583-
; GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 456
584-
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[S_MOV_B32_1]]
585-
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
582+
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 29884539
583+
; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]]
586584
%0:sgpr(s16) = G_CONSTANT i16 123
587585
%1:sgpr(s16) = G_CONSTANT i16 456
588586
%2:sgpr(s32) = G_ANYEXT %0

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -116,18 +116,14 @@ define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) {
116116
; GFX906-LABEL: v_fdot2_inline_literal_a:
117117
; GFX906: ; %bb.0:
118118
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119-
; GFX906-NEXT: s_movk_i32 s4, 0x4000
120-
; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4
121-
; GFX906-NEXT: v_dot2_f32_f16 v0, s4, v0, v1
119+
; GFX906-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
122120
; GFX906-NEXT: s_setpc_b64 s[30:31]
123121
;
124122
; GFX10-LABEL: v_fdot2_inline_literal_a:
125123
; GFX10: ; %bb.0:
126124
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127125
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
128-
; GFX10-NEXT: s_movk_i32 s4, 0x4000
129-
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4
130-
; GFX10-NEXT: v_dot2_f32_f16 v0, s4, v0, v1
126+
; GFX10-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
131127
; GFX10-NEXT: s_setpc_b64 s[30:31]
132128
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
133129
ret float %ret
@@ -137,18 +133,14 @@ define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) {
137133
; GFX906-LABEL: v_fdot2_inline_literal_b:
138134
; GFX906: ; %bb.0:
139135
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140-
; GFX906-NEXT: s_movk_i32 s4, 0x4000
141-
; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4
142-
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, s4, v1
136+
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
143137
; GFX906-NEXT: s_setpc_b64 s[30:31]
144138
;
145139
; GFX10-LABEL: v_fdot2_inline_literal_b:
146140
; GFX10: ; %bb.0:
147141
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148142
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
149-
; GFX10-NEXT: s_movk_i32 s4, 0x4000
150-
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4
151-
; GFX10-NEXT: v_dot2_f32_f16 v0, v0, s4, v1
143+
; GFX10-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
152144
; GFX10-NEXT: s_setpc_b64 s[30:31]
153145
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
154146
ret float %ret

0 commit comments

Comments
 (0)