diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index f0d1117664983..57d3eadfcc5ef 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1352,12 +1352,32 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI, DataRC1, SubReg); } - if (!MRI->constrainRegClass(Data0->getReg(), DataRC0) || - !MRI->constrainRegClass(Data1->getReg(), DataRC1)) + if (Data0->getReg().isPhysical() || Data1->getReg().isPhysical()) { return nullptr; - - // TODO: If one register can be constrained, and not the other, insert a - // copy. + } + bool canBeConstrainedData0 = + MRI->constrainRegClass(Data0->getReg(), DataRC0); + bool canBeConstrainedData1 = + MRI->constrainRegClass(Data1->getReg(), DataRC1); + if (!canBeConstrainedData0 && !canBeConstrainedData1) { + return nullptr; + } + if (!canBeConstrainedData0 || !canBeConstrainedData1) { + MachineBasicBlock::iterator InsertBefore = CI.I; + MachineBasicBlock *MBB = CI.I->getParent(); + const DebugLoc &DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(), + Paired.I->getDebugLoc()); + const CombineInfo &ActiveCI = canBeConstrainedData0 ? Paired : CI; + MachineOperand *activeData = + TII->getNamedOperand(*ActiveCI.I, AMDGPU::OpName::data0); + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const TargetRegisterClass *RC = getDataRegClass(*CI.I); + Register BaseReg = MRI->createVirtualRegister(RC); + BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg) + .addReg(activeData->getReg(), 0); + + activeData->setReg(BaseReg); + } } return Where; diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir index 33f210533e10b..34efcadc2951d 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir @@ -206,5 +206,44 @@ body: | %2:av_64_align2 = COPY $vgpr4_vgpr5 DS_WRITE_B64_gfx9 %0, %1, 512, 0, implicit $exec :: (store (s64), addrspace 3) DS_WRITE_B64_gfx9 %0, %2, 1536, 0, implicit $exec :: (store (s64), addrspace 3) +... + +--- +name: ds_write_b32__av32_physical +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: ds_write_b32__av32_physical + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %0:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: %1:av_32 = COPY $vgpr1 + ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3) + ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3) + + %0:vgpr_32 = COPY $vgpr0 + %1:av_32 = COPY $vgpr1 + DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3) +... + +--- +name: ds_write_b32__physical_av32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: ds_write_b32__physical_av32 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %0:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: %1:av_32 = COPY $vgpr2 + ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3) + ; CHECK-NEXT: DS_WRITE_B32_gfx9 %0, %1, 96, 0, implicit $exec :: (store (s32), addrspace 3) + + %0:vgpr_32 = COPY $vgpr0 + %1:av_32 = COPY $vgpr2 + DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE_B32_gfx9 %0, %1, 96, 0, implicit $exec :: (store (s32), addrspace 3) ...