@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494494 return false ;
495495
496496 const unsigned Size = Ty.getSizeInBits ();
497+ if (Ty.isPointerVector ())
498+ return true ;
497499 if (Size <= 64 )
498500 return false ;
499501 // Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502504 if (!Ty.isVector ())
503505 return true ;
504506
505- if (Ty.isPointerVector ())
506- return true ;
507-
508507 unsigned EltSize = Ty.getScalarSizeInBits ();
509508 return EltSize != 32 && EltSize != 64 ;
510509}
@@ -5818,8 +5817,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
58185817 return Reg;
58195818}
58205819
5821- Register AMDGPULegalizerInfo::fixStoreSourceType (
5822- MachineIRBuilder &B, Register VData, bool IsFormat) const {
5820+ Register AMDGPULegalizerInfo::fixStoreSourceType (MachineIRBuilder &B,
5821+ Register VData, LLT MemTy,
5822+ bool IsFormat) const {
58235823 MachineRegisterInfo *MRI = B.getMRI ();
58245824 LLT Ty = MRI->getType (VData);
58255825
@@ -5829,6 +5829,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58295829 if (hasBufferRsrcWorkaround (Ty))
58305830 return castBufferRsrcToV4I32 (VData, B);
58315831
5832+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
5833+ Ty = getBitcastRegisterType (Ty);
5834+ VData = B.buildBitcast (Ty, VData).getReg (0 );
5835+ }
58325836 // Fixup illegal register types for i8 stores.
58335837 if (Ty == LLT::scalar (8 ) || Ty == S16) {
58345838 Register AnyExt = B.buildAnyExt (LLT::scalar (32 ), VData).getReg (0 );
@@ -5846,22 +5850,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58465850}
58475851
58485852bool AMDGPULegalizerInfo::legalizeBufferStore (MachineInstr &MI,
5849- MachineRegisterInfo &MRI,
5850- MachineIRBuilder &B,
5853+ LegalizerHelper &Helper,
58515854 bool IsTyped,
58525855 bool IsFormat) const {
5856+ MachineIRBuilder &B = Helper.MIRBuilder ;
5857+ MachineRegisterInfo &MRI = *B.getMRI ();
5858+
58535859 Register VData = MI.getOperand (1 ).getReg ();
58545860 LLT Ty = MRI.getType (VData);
58555861 LLT EltTy = Ty.getScalarType ();
58565862 const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
58575863 const LLT S32 = LLT::scalar (32 );
58585864
5859- VData = fixStoreSourceType (B, VData, IsFormat);
5860- castBufferRsrcArgToV4I32 (MI, B, 2 );
5861- Register RSrc = MI.getOperand (2 ).getReg ();
5862-
58635865 MachineMemOperand *MMO = *MI.memoperands_begin ();
58645866 const int MemSize = MMO->getSize ().getValue ();
5867+ LLT MemTy = MMO->getMemoryType ();
5868+
5869+ VData = fixStoreSourceType (B, VData, MemTy, IsFormat);
5870+
5871+ castBufferRsrcArgToV4I32 (MI, B, 2 );
5872+ Register RSrc = MI.getOperand (2 ).getReg ();
58655873
58665874 unsigned ImmOffset;
58675875
@@ -5954,10 +5962,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59545962}
59555963
59565964bool AMDGPULegalizerInfo::legalizeBufferLoad (MachineInstr &MI,
5957- MachineRegisterInfo &MRI,
5958- MachineIRBuilder &B,
5965+ LegalizerHelper &Helper,
59595966 bool IsFormat,
59605967 bool IsTyped) const {
5968+ MachineIRBuilder &B = Helper.MIRBuilder ;
5969+ MachineRegisterInfo &MRI = *B.getMRI ();
5970+ GISelChangeObserver &Observer = Helper.Observer ;
5971+
59615972 // FIXME: Verifier should enforce 1 MMO for these intrinsics.
59625973 MachineMemOperand *MMO = *MI.memoperands_begin ();
59635974 const LLT MemTy = MMO->getMemoryType ();
@@ -6006,9 +6017,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
60066017 // Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
60076018 // logic doesn't have to handle that case.
60086019 if (hasBufferRsrcWorkaround (Ty)) {
6020+ Observer.changingInstr (MI);
60096021 Ty = castBufferRsrcFromV4I32 (MI, B, MRI, 0 );
6022+ Observer.changedInstr (MI);
60106023 Dst = MI.getOperand (0 ).getReg ();
6024+ B.setInsertPt (B.getMBB (), MI);
60116025 }
6026+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
6027+ Ty = getBitcastRegisterType (Ty);
6028+ Observer.changingInstr (MI);
6029+ Helper.bitcastDst (MI, Ty, 0 );
6030+ Observer.changedInstr (MI);
6031+ Dst = MI.getOperand (0 ).getReg ();
6032+ B.setInsertPt (B.getMBB (), MI);
6033+ }
6034+
60126035 LLT EltTy = Ty.getScalarType ();
60136036 const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
60146037 const bool Unpacked = ST.hasUnpackedD16VMem ();
@@ -7388,17 +7411,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73887411 case Intrinsic::amdgcn_raw_ptr_buffer_store:
73897412 case Intrinsic::amdgcn_struct_buffer_store:
73907413 case Intrinsic::amdgcn_struct_ptr_buffer_store:
7391- return legalizeBufferStore (MI, MRI, B , false , false );
7414+ return legalizeBufferStore (MI, Helper , false , false );
73927415 case Intrinsic::amdgcn_raw_buffer_store_format:
73937416 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73947417 case Intrinsic::amdgcn_struct_buffer_store_format:
73957418 case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7396- return legalizeBufferStore (MI, MRI, B , false , true );
7419+ return legalizeBufferStore (MI, Helper , false , true );
73977420 case Intrinsic::amdgcn_raw_tbuffer_store:
73987421 case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
73997422 case Intrinsic::amdgcn_struct_tbuffer_store:
74007423 case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7401- return legalizeBufferStore (MI, MRI, B , true , true );
7424+ return legalizeBufferStore (MI, Helper , true , true );
74027425 case Intrinsic::amdgcn_raw_buffer_load:
74037426 case Intrinsic::amdgcn_raw_ptr_buffer_load:
74047427 case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7407,17 +7430,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
74077430 case Intrinsic::amdgcn_struct_ptr_buffer_load:
74087431 case Intrinsic::amdgcn_struct_atomic_buffer_load:
74097432 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7410- return legalizeBufferLoad (MI, MRI, B , false , false );
7433+ return legalizeBufferLoad (MI, Helper , false , false );
74117434 case Intrinsic::amdgcn_raw_buffer_load_format:
74127435 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
74137436 case Intrinsic::amdgcn_struct_buffer_load_format:
74147437 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7415- return legalizeBufferLoad (MI, MRI, B , true , false );
7438+ return legalizeBufferLoad (MI, Helper , true , false );
74167439 case Intrinsic::amdgcn_raw_tbuffer_load:
74177440 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
74187441 case Intrinsic::amdgcn_struct_tbuffer_load:
74197442 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7420- return legalizeBufferLoad (MI, MRI, B , true , true );
7443+ return legalizeBufferLoad (MI, Helper , true , true );
74217444 case Intrinsic::amdgcn_raw_buffer_atomic_swap:
74227445 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
74237446 case Intrinsic::amdgcn_struct_buffer_atomic_swap:
0 commit comments