@@ -87,6 +87,8 @@ enum InstClassEnum {
87
87
GLOBAL_STORE_SADDR,
88
88
FLAT_LOAD,
89
89
FLAT_STORE,
90
+ FLAT_LOAD_SADDR,
91
+ FLAT_STORE_SADDR,
90
92
GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of
91
93
GLOBAL_STORE // any CombineInfo, they are only ever returned by
92
94
// getCommonInstClass.
@@ -354,6 +356,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
354
356
case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
355
357
case AMDGPU::FLAT_LOAD_DWORD:
356
358
case AMDGPU::FLAT_STORE_DWORD:
359
+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
360
+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
357
361
return 1 ;
358
362
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
359
363
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
@@ -367,6 +371,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
367
371
case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
368
372
case AMDGPU::FLAT_LOAD_DWORDX2:
369
373
case AMDGPU::FLAT_STORE_DWORDX2:
374
+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
375
+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
370
376
return 2 ;
371
377
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
372
378
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
@@ -380,6 +386,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
380
386
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
381
387
case AMDGPU::FLAT_LOAD_DWORDX3:
382
388
case AMDGPU::FLAT_STORE_DWORDX3:
389
+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
390
+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
383
391
return 3 ;
384
392
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
385
393
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
@@ -393,6 +401,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
393
401
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
394
402
case AMDGPU::FLAT_LOAD_DWORDX4:
395
403
case AMDGPU::FLAT_STORE_DWORDX4:
404
+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
405
+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
396
406
return 4 ;
397
407
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
398
408
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
@@ -575,6 +585,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
575
585
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
576
586
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
577
587
return GLOBAL_STORE_SADDR;
588
+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
589
+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
590
+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
591
+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
592
+ return FLAT_LOAD_SADDR;
593
+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
594
+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
595
+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
596
+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
597
+ return FLAT_STORE_SADDR;
578
598
}
579
599
}
580
600
@@ -661,6 +681,16 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
661
681
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
662
682
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
663
683
return AMDGPU::GLOBAL_STORE_DWORD_SADDR;
684
+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
685
+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
686
+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
687
+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
688
+ return AMDGPU::FLAT_LOAD_DWORD_SADDR;
689
+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
690
+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
691
+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
692
+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
693
+ return AMDGPU::FLAT_STORE_DWORD_SADDR;
664
694
}
665
695
}
666
696
@@ -776,6 +806,14 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
776
806
case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
777
807
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
778
808
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
809
+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
810
+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
811
+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
812
+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
813
+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
814
+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
815
+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
816
+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
779
817
Result.SAddr = true ;
780
818
[[fallthrough]];
781
819
case AMDGPU::GLOBAL_LOAD_DWORD:
@@ -1875,6 +1913,28 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
1875
1913
case 4 :
1876
1914
return AMDGPU::FLAT_STORE_DWORDX4;
1877
1915
}
1916
+ case FLAT_LOAD_SADDR:
1917
+ switch (Width) {
1918
+ default :
1919
+ return 0 ;
1920
+ case 2 :
1921
+ return AMDGPU::FLAT_LOAD_DWORDX2_SADDR;
1922
+ case 3 :
1923
+ return AMDGPU::FLAT_LOAD_DWORDX3_SADDR;
1924
+ case 4 :
1925
+ return AMDGPU::FLAT_LOAD_DWORDX4_SADDR;
1926
+ }
1927
+ case FLAT_STORE_SADDR:
1928
+ switch (Width) {
1929
+ default :
1930
+ return 0 ;
1931
+ case 2 :
1932
+ return AMDGPU::FLAT_STORE_DWORDX2_SADDR;
1933
+ case 3 :
1934
+ return AMDGPU::FLAT_STORE_DWORDX3_SADDR;
1935
+ case 4 :
1936
+ return AMDGPU::FLAT_STORE_DWORDX4_SADDR;
1937
+ }
1878
1938
case MIMG:
1879
1939
assert (((unsigned )llvm::popcount (CI.DMask | Paired.DMask ) == Width) &&
1880
1940
" No overlaps" );
@@ -2508,12 +2568,14 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
2508
2568
OptimizeListAgain |= CI.Width + Paired.Width < 4 ;
2509
2569
break ;
2510
2570
case FLAT_LOAD:
2571
+ case FLAT_LOAD_SADDR:
2511
2572
case GLOBAL_LOAD:
2512
2573
case GLOBAL_LOAD_SADDR:
2513
2574
NewMI = mergeFlatLoadPair (CI, Paired, Where->I );
2514
2575
OptimizeListAgain |= CI.Width + Paired.Width < 4 ;
2515
2576
break ;
2516
2577
case FLAT_STORE:
2578
+ case FLAT_STORE_SADDR:
2517
2579
case GLOBAL_STORE:
2518
2580
case GLOBAL_STORE_SADDR:
2519
2581
NewMI = mergeFlatStorePair (CI, Paired, Where->I );
0 commit comments