Skip to content

Commit be2ae26

Browse files
authored
[AMDGPU] Record old VGPR MSBs in the high bits of s_set_vgpr_msb (llvm#165035)
Fixes: SWDEV-562450
1 parent 315dfe5 commit be2ae26

File tree

6 files changed

+104
-93
lines changed

6 files changed

+104
-93
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class AMDGPULowerVGPREncoding {
5858
static constexpr unsigned BitsPerField = 2;
5959
static constexpr unsigned NumFields = 4;
6060
static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;
61+
static constexpr unsigned ModeWidth = NumFields * BitsPerField;
62+
static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
6163
using ModeType = PackedVector<unsigned, BitsPerField,
6264
std::bitset<BitsPerField * NumFields>>;
6365

@@ -152,13 +154,21 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
152154
CurrentMode |= NewMode;
153155
CurrentMask |= Mask;
154156

155-
MostRecentModeSet->getOperand(0).setImm(CurrentMode);
157+
MachineOperand &Op = MostRecentModeSet->getOperand(0);
158+
159+
// Carry old mode bits from the existing instruction.
160+
int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
161+
162+
Op.setImm(CurrentMode | OldModeBits);
156163
return true;
157164
}
158165

166+
// Record previous mode into high 8 bits of the immediate.
167+
int64_t OldModeBits = CurrentMode << ModeWidth;
168+
159169
I = handleClause(I);
160-
MostRecentModeSet =
161-
BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB)).addImm(NewMode);
170+
MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
171+
.addImm(NewMode | OldModeBits);
162172

163173
CurrentMode = NewMode;
164174
CurrentMask = Mask;

llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
412412
*OutStreamer);
413413

414414
if (isVerbose() && MI->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
415-
unsigned V = MI->getOperand(0).getImm();
415+
unsigned V = MI->getOperand(0).getImm() & 0xff;
416416
OutStreamer->AddComment(
417417
" msbs: dst=" + Twine(V >> 6) + " src0=" + Twine(V & 3) +
418418
" src1=" + Twine((V >> 2) & 3) + " src2=" + Twine((V >> 4) & 3));

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ bool AMDGPUMCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
168168

169169
void AMDGPUMCInstrAnalysis::updateState(const MCInst &Inst, uint64_t Addr) {
170170
if (Inst.getOpcode() == AMDGPU::S_SET_VGPR_MSB_gfx12)
171-
VgprMSBs = Inst.getOperand(0).getImm();
171+
VgprMSBs = Inst.getOperand(0).getImm() & 0xff;
172172
else if (isTerminator(Inst))
173173
VgprMSBs = 0;
174174
}

llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ body: |
3636
; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/
3737
$vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode
3838
39-
; GCN-NEXT: s_set_vgpr_msb 0x8a
39+
; GCN-NEXT: s_set_vgpr_msb 0x458a
4040
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=0
4141
; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/
4242
$vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode
@@ -50,7 +50,7 @@ body: |
5050
; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/
5151
$vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode
5252
53-
; GCN-NEXT: s_set_vgpr_msb 0xcf
53+
; GCN-NEXT: s_set_vgpr_msb 0x8acf
5454
; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=0
5555
; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
5656
$vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode

0 commit comments

Comments
 (0)