@@ -89,6 +89,10 @@ class AMDGPURegBankCombinerImpl : public Combiner {
89
89
90
90
void applyCanonicalizeZextShiftAmt (MachineInstr &MI, MachineInstr &Ext) const ;
91
91
92
+ bool combineD16Load (MachineInstr &MI) const ;
93
+ bool applyD16Load (unsigned D16Opc, MachineInstr &DstMI,
94
+ MachineInstr *SmallLoad, Register ToOverwriteD16) const ;
95
+
92
96
private:
93
97
SIModeRegisterDefaults getMode () const ;
94
98
bool getIEEE () const ;
@@ -392,6 +396,88 @@ void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
392
396
MI.eraseFromParent ();
393
397
}
394
398
399
+ bool AMDGPURegBankCombinerImpl::combineD16Load (MachineInstr &MI) const {
400
+ Register Dst;
401
+ MachineInstr *Load, *SextLoad;
402
+ const int64_t CleanLo16 = 0xFFFFFFFFFFFF0000 ;
403
+ const int64_t CleanHi16 = 0x000000000000FFFF ;
404
+
405
+ // Load lo
406
+ if (mi_match (MI.getOperand (1 ).getReg (), MRI,
407
+ m_GOr (m_GAnd (m_GBitcast (m_Reg (Dst)),
408
+ m_Copy (m_SpecificICst (CleanLo16))),
409
+ m_MInstr (Load)))) {
410
+
411
+ if (Load->getOpcode () == AMDGPU::G_ZEXTLOAD) {
412
+ const MachineMemOperand *MMO = *Load->memoperands_begin ();
413
+ unsigned LoadSize = MMO->getSizeInBits ().getValue ();
414
+ if (LoadSize == 8 )
415
+ return applyD16Load (AMDGPU::G_AMDGPU_LOAD_D16_LO_U8, MI, Load, Dst);
416
+ if (LoadSize == 16 )
417
+ return applyD16Load (AMDGPU::G_AMDGPU_LOAD_D16_LO, MI, Load, Dst);
418
+ return false ;
419
+ }
420
+
421
+ if (mi_match (
422
+ Load, MRI,
423
+ m_GAnd (m_MInstr (SextLoad), m_Copy (m_SpecificICst (CleanHi16))))) {
424
+ if (SextLoad->getOpcode () != AMDGPU::G_SEXTLOAD)
425
+ return false ;
426
+
427
+ const MachineMemOperand *MMO = *SextLoad->memoperands_begin ();
428
+ if (MMO->getSizeInBits ().getValue () != 8 )
429
+ return false ;
430
+
431
+ return applyD16Load (AMDGPU::G_AMDGPU_LOAD_D16_LO_I8, MI, SextLoad, Dst);
432
+ }
433
+
434
+ return false ;
435
+ }
436
+
437
+ // Load hi
438
+ if (mi_match (MI.getOperand (1 ).getReg (), MRI,
439
+ m_GOr (m_GAnd (m_GBitcast (m_Reg (Dst)),
440
+ m_Copy (m_SpecificICst (CleanHi16))),
441
+ m_GShl (m_MInstr (Load), m_Copy (m_SpecificICst (16 )))))) {
442
+
443
+ if (Load->getOpcode () == AMDGPU::G_ZEXTLOAD) {
444
+ const MachineMemOperand *MMO = *Load->memoperands_begin ();
445
+ unsigned LoadSize = MMO->getSizeInBits ().getValue ();
446
+ if (LoadSize == 8 )
447
+ return applyD16Load (AMDGPU::G_AMDGPU_LOAD_D16_HI_U8, MI, Load, Dst);
448
+ if (LoadSize == 16 )
449
+ return applyD16Load (AMDGPU::G_AMDGPU_LOAD_D16_HI, MI, Load, Dst);
450
+ return false ;
451
+ }
452
+
453
+ if (mi_match (
454
+ Load, MRI,
455
+ m_GAnd (m_MInstr (SextLoad), m_Copy (m_SpecificICst (CleanHi16))))) {
456
+ if (SextLoad->getOpcode () != AMDGPU::G_SEXTLOAD)
457
+ return false ;
458
+ const MachineMemOperand *MMO = *SextLoad->memoperands_begin ();
459
+ if (MMO->getSizeInBits ().getValue () != 8 )
460
+ return false ;
461
+
462
+ return applyD16Load (AMDGPU::G_AMDGPU_LOAD_D16_HI_I8, MI, SextLoad, Dst);
463
+ }
464
+
465
+ return false ;
466
+ }
467
+
468
+ return false ;
469
+ }
470
+
471
+ bool AMDGPURegBankCombinerImpl::applyD16Load (
472
+ unsigned D16Opc, MachineInstr &DstMI, MachineInstr *SmallLoad,
473
+ Register SrcReg32ToOverwriteD16) const {
474
+ B.buildInstr (D16Opc, {DstMI.getOperand (0 ).getReg ()},
475
+ {SmallLoad->getOperand (1 ).getReg (), SrcReg32ToOverwriteD16})
476
+ .setMemRefs (SmallLoad->memoperands ());
477
+ DstMI.eraseFromParent ();
478
+ return true ;
479
+ }
480
+
395
481
SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode () const {
396
482
return MF.getInfo <SIMachineFunctionInfo>()->getMode ();
397
483
}
0 commit comments