|
23 | 23 | #include "llvm/CodeGen/MachineInstr.h" |
24 | 24 | #include "llvm/CodeGen/MachineUniformityAnalysis.h" |
25 | 25 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
| 26 | +#include "llvm/Support/AMDGPUAddrSpace.h" |
26 | 27 |
|
27 | 28 | #define DEBUG_TYPE "amdgpu-regbanklegalize" |
28 | 29 |
|
@@ -286,6 +287,25 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) { |
286 | 287 | MI.eraseFromParent(); |
287 | 288 | } |
288 | 289 |
|
| 290 | +void RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) { |
| 291 | + Register Dst = MI.getOperand(0).getReg(); |
| 292 | + LLT DstTy = MRI.getType(Dst); |
| 293 | + assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64 || |
| 294 | + (DstTy.isPointer() && DstTy.getSizeInBits() == 64)); |
| 295 | + LLT Ty = DstTy == V4S16 ? V2S16 : S32; |
| 296 | + auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg()); |
| 297 | + auto Op3 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(3).getReg()); |
| 298 | + Register Cond = MI.getOperand(1).getReg(); |
| 299 | + auto Flags = MI.getFlags(); |
| 300 | + auto Lo = |
| 301 | + B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(0), Op3.getReg(0), Flags); |
| 302 | + auto Hi = |
| 303 | + B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(1), Op3.getReg(1), Flags); |
| 304 | + |
| 305 | + B.buildMergeLikeInstr(Dst, {Lo, Hi}); |
| 306 | + MI.eraseFromParent(); |
| 307 | +} |
| 308 | + |
289 | 309 | void RegBankLegalizeHelper::lower(MachineInstr &MI, |
290 | 310 | const RegBankLLTMapping &Mapping, |
291 | 311 | SmallSet<Register, 4> &WaterfallSgprs) { |
@@ -372,6 +392,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, |
372 | 392 | return lowerS_BFE(MI); |
373 | 393 | case SplitTo32: |
374 | 394 | return lowerSplitTo32(MI); |
| 395 | + case SplitTo32Select: |
| 396 | + return lowerSplitTo32Select(MI); |
375 | 397 | case SplitLoad: { |
376 | 398 | LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); |
377 | 399 | unsigned Size = DstTy.getSizeInBits(); |
@@ -485,7 +507,8 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { |
485 | 507 | case UniInVgprB64: |
486 | 508 | if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || |
487 | 509 | Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) || |
488 | | - Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64)) |
| 510 | + Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64) || |
| 511 | + (Ty.isPointer() && Ty.getAddressSpace() > AMDGPUAS::MAX_AMDGPU_ADDRESS)) |
489 | 512 | return Ty; |
490 | 513 | return LLT(); |
491 | 514 | case SgprB96: |
|
0 commit comments