@@ -87,6 +87,8 @@ enum InstClassEnum {
8787 GLOBAL_STORE_SADDR,
8888 FLAT_LOAD,
8989 FLAT_STORE,
90+ FLAT_LOAD_SADDR,
91+ FLAT_STORE_SADDR,
9092 GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of
9193 GLOBAL_STORE // any CombineInfo, they are only ever returned by
9294 // getCommonInstClass.
@@ -354,6 +356,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
354356 case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
355357 case AMDGPU::FLAT_LOAD_DWORD:
356358 case AMDGPU::FLAT_STORE_DWORD:
359+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
360+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
357361 return 1 ;
358362 case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
359363 case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
@@ -367,6 +371,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
367371 case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
368372 case AMDGPU::FLAT_LOAD_DWORDX2:
369373 case AMDGPU::FLAT_STORE_DWORDX2:
374+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
375+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
370376 return 2 ;
371377 case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
372378 case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
@@ -380,6 +386,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
380386 case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
381387 case AMDGPU::FLAT_LOAD_DWORDX3:
382388 case AMDGPU::FLAT_STORE_DWORDX3:
389+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
390+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
383391 return 3 ;
384392 case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
385393 case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
@@ -393,6 +401,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
393401 case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
394402 case AMDGPU::FLAT_LOAD_DWORDX4:
395403 case AMDGPU::FLAT_STORE_DWORDX4:
404+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
405+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
396406 return 4 ;
397407 case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
398408 case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
@@ -575,6 +585,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
575585 case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
576586 case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
577587 return GLOBAL_STORE_SADDR;
588+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
589+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
590+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
591+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
592+ return FLAT_LOAD_SADDR;
593+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
594+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
595+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
596+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
597+ return FLAT_STORE_SADDR;
578598 }
579599}
580600
@@ -661,6 +681,16 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
661681 case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
662682 case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
663683 return AMDGPU::GLOBAL_STORE_DWORD_SADDR;
684+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
685+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
686+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
687+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
688+ return AMDGPU::FLAT_LOAD_DWORD_SADDR;
689+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
690+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
691+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
692+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
693+ return AMDGPU::FLAT_STORE_DWORD_SADDR;
664694 }
665695}
666696
@@ -776,6 +806,14 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
776806 case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
777807 case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
778808 case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
809+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
810+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
811+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
812+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
813+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
814+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
815+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
816+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
779817 Result.SAddr = true ;
780818 [[fallthrough]];
781819 case AMDGPU::GLOBAL_LOAD_DWORD:
@@ -1875,6 +1913,28 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
18751913 case 4 :
18761914 return AMDGPU::FLAT_STORE_DWORDX4;
18771915 }
1916+ case FLAT_LOAD_SADDR:
1917+ switch (Width) {
1918+ default :
1919+ return 0 ;
1920+ case 2 :
1921+ return AMDGPU::FLAT_LOAD_DWORDX2_SADDR;
1922+ case 3 :
1923+ return AMDGPU::FLAT_LOAD_DWORDX3_SADDR;
1924+ case 4 :
1925+ return AMDGPU::FLAT_LOAD_DWORDX4_SADDR;
1926+ }
1927+ case FLAT_STORE_SADDR:
1928+ switch (Width) {
1929+ default :
1930+ return 0 ;
1931+ case 2 :
1932+ return AMDGPU::FLAT_STORE_DWORDX2_SADDR;
1933+ case 3 :
1934+ return AMDGPU::FLAT_STORE_DWORDX3_SADDR;
1935+ case 4 :
1936+ return AMDGPU::FLAT_STORE_DWORDX4_SADDR;
1937+ }
18781938 case MIMG:
18791939 assert (((unsigned )llvm::popcount (CI.DMask | Paired.DMask ) == Width) &&
18801940 " No overlaps" );
@@ -2508,12 +2568,14 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
25082568 OptimizeListAgain |= CI.Width + Paired.Width < 4 ;
25092569 break ;
25102570 case FLAT_LOAD:
2571+ case FLAT_LOAD_SADDR:
25112572 case GLOBAL_LOAD:
25122573 case GLOBAL_LOAD_SADDR:
25132574 NewMI = mergeFlatLoadPair (CI, Paired, Where->I );
25142575 OptimizeListAgain |= CI.Width + Paired.Width < 4 ;
25152576 break ;
25162577 case FLAT_STORE:
2578+ case FLAT_STORE_SADDR:
25172579 case GLOBAL_STORE:
25182580 case GLOBAL_STORE_SADDR:
25192581 NewMI = mergeFlatStorePair (CI, Paired, Where->I );
0 commit comments