@@ -492,6 +492,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
492492 return false ;
493493
494494 const unsigned Size = Ty.getSizeInBits ();
495+ if (Ty.isPointerVector ())
496+ return true ;
495497 if (Size <= 64 )
496498 return false ;
497499 // Address space 8 pointers get their own workaround.
@@ -500,9 +502,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
500502 if (!Ty.isVector ())
501503 return true ;
502504
503- if (Ty.isPointerVector ())
504- return true ;
505-
506505 unsigned EltSize = Ty.getScalarSizeInBits ();
507506 return EltSize != 32 && EltSize != 64 ;
508507}
@@ -5809,8 +5808,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
58095808 return Reg;
58105809}
58115810
5812- Register AMDGPULegalizerInfo::fixStoreSourceType (
5813- MachineIRBuilder &B, Register VData, bool IsFormat) const {
5811+ Register AMDGPULegalizerInfo::fixStoreSourceType (MachineIRBuilder &B,
5812+ Register VData, LLT MemTy,
5813+ bool IsFormat) const {
58145814 MachineRegisterInfo *MRI = B.getMRI ();
58155815 LLT Ty = MRI->getType (VData);
58165816
@@ -5820,6 +5820,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58205820 if (hasBufferRsrcWorkaround (Ty))
58215821 return castBufferRsrcToV4I32 (VData, B);
58225822
5823+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
5824+ Ty = getBitcastRegisterType (Ty);
5825+ VData = B.buildBitcast (Ty, VData).getReg (0 );
5826+ }
58235827 // Fixup illegal register types for i8 stores.
58245828 if (Ty == LLT::scalar (8 ) || Ty == S16) {
58255829 Register AnyExt = B.buildAnyExt (LLT::scalar (32 ), VData).getReg (0 );
@@ -5837,22 +5841,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58375841}
58385842
58395843bool AMDGPULegalizerInfo::legalizeBufferStore (MachineInstr &MI,
5840- MachineRegisterInfo &MRI,
5841- MachineIRBuilder &B,
5844+ LegalizerHelper &Helper,
58425845 bool IsTyped,
58435846 bool IsFormat) const {
5847+ MachineIRBuilder &B = Helper.MIRBuilder ;
5848+ MachineRegisterInfo &MRI = *B.getMRI ();
5849+
58445850 Register VData = MI.getOperand (1 ).getReg ();
58455851 LLT Ty = MRI.getType (VData);
58465852 LLT EltTy = Ty.getScalarType ();
58475853 const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
58485854 const LLT S32 = LLT::scalar (32 );
58495855
5850- VData = fixStoreSourceType (B, VData, IsFormat);
5851- castBufferRsrcArgToV4I32 (MI, B, 2 );
5852- Register RSrc = MI.getOperand (2 ).getReg ();
5853-
58545856 MachineMemOperand *MMO = *MI.memoperands_begin ();
58555857 const int MemSize = MMO->getSize ().getValue ();
5858+ LLT MemTy = MMO->getMemoryType ();
5859+
5860+ VData = fixStoreSourceType (B, VData, MemTy, IsFormat);
5861+
5862+ castBufferRsrcArgToV4I32 (MI, B, 2 );
5863+ Register RSrc = MI.getOperand (2 ).getReg ();
58565864
58575865 unsigned ImmOffset;
58585866
@@ -5945,10 +5953,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59455953}
59465954
59475955bool AMDGPULegalizerInfo::legalizeBufferLoad (MachineInstr &MI,
5948- MachineRegisterInfo &MRI,
5949- MachineIRBuilder &B,
5956+ LegalizerHelper &Helper,
59505957 bool IsFormat,
59515958 bool IsTyped) const {
5959+ MachineIRBuilder &B = Helper.MIRBuilder ;
5960+ MachineRegisterInfo &MRI = *B.getMRI ();
5961+ GISelChangeObserver &Observer = Helper.Observer ;
5962+
59525963 // FIXME: Verifier should enforce 1 MMO for these intrinsics.
59535964 MachineMemOperand *MMO = *MI.memoperands_begin ();
59545965 const LLT MemTy = MMO->getMemoryType ();
@@ -5997,9 +6008,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
59976008 // Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
59986009 // logic doesn't have to handle that case.
59996010 if (hasBufferRsrcWorkaround (Ty)) {
6011+ Observer.changingInstr (MI);
60006012 Ty = castBufferRsrcFromV4I32 (MI, B, MRI, 0 );
6013+ Observer.changedInstr (MI);
60016014 Dst = MI.getOperand (0 ).getReg ();
6015+ B.setInsertPt (B.getMBB (), MI);
60026016 }
6017+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
6018+ Ty = getBitcastRegisterType (Ty);
6019+ Observer.changingInstr (MI);
6020+ Helper.bitcastDst (MI, Ty, 0 );
6021+ Observer.changedInstr (MI);
6022+ Dst = MI.getOperand (0 ).getReg ();
6023+ B.setInsertPt (B.getMBB (), MI);
6024+ }
6025+
60036026 LLT EltTy = Ty.getScalarType ();
60046027 const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
60056028 const bool Unpacked = ST.hasUnpackedD16VMem ();
@@ -7367,17 +7390,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73677390 case Intrinsic::amdgcn_raw_ptr_buffer_store:
73687391 case Intrinsic::amdgcn_struct_buffer_store:
73697392 case Intrinsic::amdgcn_struct_ptr_buffer_store:
7370- return legalizeBufferStore (MI, MRI, B , false , false );
7393+ return legalizeBufferStore (MI, Helper , false , false );
73717394 case Intrinsic::amdgcn_raw_buffer_store_format:
73727395 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73737396 case Intrinsic::amdgcn_struct_buffer_store_format:
73747397 case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7375- return legalizeBufferStore (MI, MRI, B , false , true );
7398+ return legalizeBufferStore (MI, Helper , false , true );
73767399 case Intrinsic::amdgcn_raw_tbuffer_store:
73777400 case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
73787401 case Intrinsic::amdgcn_struct_tbuffer_store:
73797402 case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7380- return legalizeBufferStore (MI, MRI, B , true , true );
7403+ return legalizeBufferStore (MI, Helper , true , true );
73817404 case Intrinsic::amdgcn_raw_buffer_load:
73827405 case Intrinsic::amdgcn_raw_ptr_buffer_load:
73837406 case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7386,17 +7409,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73867409 case Intrinsic::amdgcn_struct_ptr_buffer_load:
73877410 case Intrinsic::amdgcn_struct_atomic_buffer_load:
73887411 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7389- return legalizeBufferLoad (MI, MRI, B , false , false );
7412+ return legalizeBufferLoad (MI, Helper , false , false );
73907413 case Intrinsic::amdgcn_raw_buffer_load_format:
73917414 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
73927415 case Intrinsic::amdgcn_struct_buffer_load_format:
73937416 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7394- return legalizeBufferLoad (MI, MRI, B , true , false );
7417+ return legalizeBufferLoad (MI, Helper , true , false );
73957418 case Intrinsic::amdgcn_raw_tbuffer_load:
73967419 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
73977420 case Intrinsic::amdgcn_struct_tbuffer_load:
73987421 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7399- return legalizeBufferLoad (MI, MRI, B , true , true );
7422+ return legalizeBufferLoad (MI, Helper , true , true );
74007423 case Intrinsic::amdgcn_raw_buffer_atomic_swap:
74017424 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
74027425 case Intrinsic::amdgcn_struct_buffer_atomic_swap:
0 commit comments