@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494494 return false ;
495495
496496 const unsigned Size = Ty.getSizeInBits ();
497+ if (Ty.isPointerVector ())
498+ return true ;
497499 if (Size <= 64 )
498500 return false ;
499501 // Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502504 if (!Ty.isVector ())
503505 return true ;
504506
505- if (Ty.isPointerVector ())
506- return true ;
507-
508507 unsigned EltSize = Ty.getScalarSizeInBits ();
509508 return EltSize != 32 && EltSize != 64 ;
510509}
@@ -5794,8 +5793,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
57945793 return Reg;
57955794}
57965795
5797- Register AMDGPULegalizerInfo::fixStoreSourceType (
5798- MachineIRBuilder &B, Register VData, bool IsFormat) const {
5796+ Register AMDGPULegalizerInfo::fixStoreSourceType (MachineIRBuilder &B,
5797+ Register VData, LLT MemTy,
5798+ bool IsFormat) const {
57995799 MachineRegisterInfo *MRI = B.getMRI ();
58005800 LLT Ty = MRI->getType (VData);
58015801
@@ -5805,6 +5805,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58055805 if (hasBufferRsrcWorkaround (Ty))
58065806 return castBufferRsrcToV4I32 (VData, B);
58075807
5808+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
5809+ Ty = getBitcastRegisterType (Ty);
5810+ VData = B.buildBitcast (Ty, VData).getReg (0 );
5811+ }
58085812 // Fixup illegal register types for i8 stores.
58095813 if (Ty == LLT::scalar (8 ) || Ty == S16) {
58105814 Register AnyExt = B.buildAnyExt (LLT::scalar (32 ), VData).getReg (0 );
@@ -5822,22 +5826,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58225826}
58235827
58245828bool AMDGPULegalizerInfo::legalizeBufferStore (MachineInstr &MI,
5825- MachineRegisterInfo &MRI,
5826- MachineIRBuilder &B,
5829+ LegalizerHelper &Helper,
58275830 bool IsTyped,
58285831 bool IsFormat) const {
5832+ MachineIRBuilder &B = Helper.MIRBuilder ;
5833+ MachineRegisterInfo &MRI = *B.getMRI ();
5834+
58295835 Register VData = MI.getOperand (1 ).getReg ();
58305836 LLT Ty = MRI.getType (VData);
58315837 LLT EltTy = Ty.getScalarType ();
58325838 const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
58335839 const LLT S32 = LLT::scalar (32 );
58345840
5835- VData = fixStoreSourceType (B, VData, IsFormat);
5836- castBufferRsrcArgToV4I32 (MI, B, 2 );
5837- Register RSrc = MI.getOperand (2 ).getReg ();
5838-
58395841 MachineMemOperand *MMO = *MI.memoperands_begin ();
58405842 const int MemSize = MMO->getSize ().getValue ();
5843+ LLT MemTy = MMO->getMemoryType ();
5844+
5845+ VData = fixStoreSourceType (B, VData, MemTy, IsFormat);
5846+
5847+ castBufferRsrcArgToV4I32 (MI, B, 2 );
5848+ Register RSrc = MI.getOperand (2 ).getReg ();
58415849
58425850 unsigned ImmOffset;
58435851
@@ -5930,10 +5938,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59305938}
59315939
59325940bool AMDGPULegalizerInfo::legalizeBufferLoad (MachineInstr &MI,
5933- MachineRegisterInfo &MRI,
5934- MachineIRBuilder &B,
5941+ LegalizerHelper &Helper,
59355942 bool IsFormat,
59365943 bool IsTyped) const {
5944+ MachineIRBuilder &B = Helper.MIRBuilder ;
5945+ MachineRegisterInfo &MRI = *B.getMRI ();
5946+ GISelChangeObserver &Observer = Helper.Observer ;
5947+
59375948 // FIXME: Verifier should enforce 1 MMO for these intrinsics.
59385949 MachineMemOperand *MMO = *MI.memoperands_begin ();
59395950 const LLT MemTy = MMO->getMemoryType ();
@@ -5982,9 +5993,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
59825993 // Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
59835994 // logic doesn't have to handle that case.
59845995 if (hasBufferRsrcWorkaround (Ty)) {
5996+ Observer.changingInstr (MI);
59855997 Ty = castBufferRsrcFromV4I32 (MI, B, MRI, 0 );
5998+ Observer.changedInstr (MI);
59865999 Dst = MI.getOperand (0 ).getReg ();
6000+ B.setInsertPt (B.getMBB (), MI);
59876001 }
6002+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
6003+ Ty = getBitcastRegisterType (Ty);
6004+ Observer.changingInstr (MI);
6005+ Helper.bitcastDst (MI, Ty, 0 );
6006+ Observer.changedInstr (MI);
6007+ Dst = MI.getOperand (0 ).getReg ();
6008+ B.setInsertPt (B.getMBB (), MI);
6009+ }
6010+
59886011 LLT EltTy = Ty.getScalarType ();
59896012 const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
59906013 const bool Unpacked = ST.hasUnpackedD16VMem ();
@@ -7364,17 +7387,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73647387 case Intrinsic::amdgcn_raw_ptr_buffer_store:
73657388 case Intrinsic::amdgcn_struct_buffer_store:
73667389 case Intrinsic::amdgcn_struct_ptr_buffer_store:
7367- return legalizeBufferStore (MI, MRI, B , false , false );
7390+ return legalizeBufferStore (MI, Helper , false , false );
73687391 case Intrinsic::amdgcn_raw_buffer_store_format:
73697392 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73707393 case Intrinsic::amdgcn_struct_buffer_store_format:
73717394 case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7372- return legalizeBufferStore (MI, MRI, B , false , true );
7395+ return legalizeBufferStore (MI, Helper , false , true );
73737396 case Intrinsic::amdgcn_raw_tbuffer_store:
73747397 case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
73757398 case Intrinsic::amdgcn_struct_tbuffer_store:
73767399 case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7377- return legalizeBufferStore (MI, MRI, B , true , true );
7400+ return legalizeBufferStore (MI, Helper , true , true );
73787401 case Intrinsic::amdgcn_raw_buffer_load:
73797402 case Intrinsic::amdgcn_raw_ptr_buffer_load:
73807403 case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7383,17 +7406,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73837406 case Intrinsic::amdgcn_struct_ptr_buffer_load:
73847407 case Intrinsic::amdgcn_struct_atomic_buffer_load:
73857408 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7386- return legalizeBufferLoad (MI, MRI, B , false , false );
7409+ return legalizeBufferLoad (MI, Helper , false , false );
73877410 case Intrinsic::amdgcn_raw_buffer_load_format:
73887411 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
73897412 case Intrinsic::amdgcn_struct_buffer_load_format:
73907413 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7391- return legalizeBufferLoad (MI, MRI, B , true , false );
7414+ return legalizeBufferLoad (MI, Helper , true , false );
73927415 case Intrinsic::amdgcn_raw_tbuffer_load:
73937416 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
73947417 case Intrinsic::amdgcn_struct_tbuffer_load:
73957418 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7396- return legalizeBufferLoad (MI, MRI, B , true , true );
7419+ return legalizeBufferLoad (MI, Helper , true , true );
73977420 case Intrinsic::amdgcn_raw_buffer_atomic_swap:
73987421 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
73997422 case Intrinsic::amdgcn_struct_buffer_atomic_swap:
0 commit comments