Skip to content
30 changes: 25 additions & 5 deletions llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1352,12 +1352,32 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
DataRC1, SubReg);
}

if (!MRI->constrainRegClass(Data0->getReg(), DataRC0) ||
!MRI->constrainRegClass(Data1->getReg(), DataRC1))
if (Data0->getReg().isPhysical() || Data1->getReg().isPhysical()) {
return nullptr;

// TODO: If one register can be constrained, and not the other, insert a
// copy.
}
bool canBeConstrainedData0 =
MRI->constrainRegClass(Data0->getReg(), DataRC0);
bool canBeConstrainedData1 =
MRI->constrainRegClass(Data1->getReg(), DataRC1);
if (!canBeConstrainedData0 && !canBeConstrainedData1) {
return nullptr;
}
if (!canBeConstrainedData0 || !canBeConstrainedData1) {
MachineBasicBlock::iterator InsertBefore = CI.I;
MachineBasicBlock *MBB = CI.I->getParent();
const DebugLoc &DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
Paired.I->getDebugLoc());
const CombineInfo &ActiveCI = canBeConstrainedData0 ? Paired : CI;
MachineOperand *activeData =
TII->getNamedOperand(*ActiveCI.I, AMDGPU::OpName::data0);
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
const TargetRegisterClass *RC = getDataRegClass(*CI.I);
Register BaseReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg)
.addReg(activeData->getReg(), 0);

activeData->setReg(BaseReg);
}
}

return Where;
Expand Down
39 changes: 39 additions & 0 deletions llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Original file line number Diff line number Diff line change
Expand Up @@ -206,5 +206,44 @@ body: |
%2:av_64_align2 = COPY $vgpr4_vgpr5
DS_WRITE_B64_gfx9 %0, %1, 512, 0, implicit $exec :: (store (s64), addrspace 3)
DS_WRITE_B64_gfx9 %0, %2, 1536, 0, implicit $exec :: (store (s64), addrspace 3)
...

---
name: ds_write_b32__av32_physical
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2

; CHECK-LABEL: name: ds_write_b32__av32_physical
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %0:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: %1:av_32 = COPY $vgpr1
; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)

%0:vgpr_32 = COPY $vgpr0
%1:av_32 = COPY $vgpr1
DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...

---
name: ds_write_b32__physical_av32
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2

; CHECK-LABEL: name: ds_write_b32__physical_av32
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %0:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: %1:av_32 = COPY $vgpr2
; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, %1, 96, 0, implicit $exec :: (store (s32), addrspace 3)

%0:vgpr_32 = COPY $vgpr0
%1:av_32 = COPY $vgpr2
DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %1, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...