Skip to content

Commit 668e649

Browse files
authored
[AMDGPU] Support merging of flat GVS ops (#154200)
1 parent 13dd650 commit 668e649

File tree

2 files changed

+400
-0
lines changed

2 files changed

+400
-0
lines changed

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ enum InstClassEnum {
8787
GLOBAL_STORE_SADDR,
8888
FLAT_LOAD,
8989
FLAT_STORE,
90+
FLAT_LOAD_SADDR,
91+
FLAT_STORE_SADDR,
9092
GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of
9193
GLOBAL_STORE // any CombineInfo, they are only ever returned by
9294
// getCommonInstClass.
@@ -354,6 +356,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
354356
case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
355357
case AMDGPU::FLAT_LOAD_DWORD:
356358
case AMDGPU::FLAT_STORE_DWORD:
359+
case AMDGPU::FLAT_LOAD_DWORD_SADDR:
360+
case AMDGPU::FLAT_STORE_DWORD_SADDR:
357361
return 1;
358362
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
359363
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
@@ -367,6 +371,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
367371
case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
368372
case AMDGPU::FLAT_LOAD_DWORDX2:
369373
case AMDGPU::FLAT_STORE_DWORDX2:
374+
case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
375+
case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
370376
return 2;
371377
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
372378
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
@@ -380,6 +386,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
380386
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
381387
case AMDGPU::FLAT_LOAD_DWORDX3:
382388
case AMDGPU::FLAT_STORE_DWORDX3:
389+
case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
390+
case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
383391
return 3;
384392
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
385393
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
@@ -393,6 +401,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
393401
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
394402
case AMDGPU::FLAT_LOAD_DWORDX4:
395403
case AMDGPU::FLAT_STORE_DWORDX4:
404+
case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
405+
case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
396406
return 4;
397407
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
398408
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
@@ -575,6 +585,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
575585
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
576586
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
577587
return GLOBAL_STORE_SADDR;
588+
case AMDGPU::FLAT_LOAD_DWORD_SADDR:
589+
case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
590+
case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
591+
case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
592+
return FLAT_LOAD_SADDR;
593+
case AMDGPU::FLAT_STORE_DWORD_SADDR:
594+
case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
595+
case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
596+
case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
597+
return FLAT_STORE_SADDR;
578598
}
579599
}
580600

@@ -661,6 +681,16 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
661681
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
662682
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
663683
return AMDGPU::GLOBAL_STORE_DWORD_SADDR;
684+
case AMDGPU::FLAT_LOAD_DWORD_SADDR:
685+
case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
686+
case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
687+
case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
688+
return AMDGPU::FLAT_LOAD_DWORD_SADDR;
689+
case AMDGPU::FLAT_STORE_DWORD_SADDR:
690+
case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
691+
case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
692+
case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
693+
return AMDGPU::FLAT_STORE_DWORD_SADDR;
664694
}
665695
}
666696

@@ -776,6 +806,14 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
776806
case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
777807
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
778808
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
809+
case AMDGPU::FLAT_LOAD_DWORD_SADDR:
810+
case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
811+
case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
812+
case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
813+
case AMDGPU::FLAT_STORE_DWORD_SADDR:
814+
case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
815+
case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
816+
case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
779817
Result.SAddr = true;
780818
[[fallthrough]];
781819
case AMDGPU::GLOBAL_LOAD_DWORD:
@@ -1875,6 +1913,28 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
18751913
case 4:
18761914
return AMDGPU::FLAT_STORE_DWORDX4;
18771915
}
1916+
case FLAT_LOAD_SADDR:
1917+
switch (Width) {
1918+
default:
1919+
return 0;
1920+
case 2:
1921+
return AMDGPU::FLAT_LOAD_DWORDX2_SADDR;
1922+
case 3:
1923+
return AMDGPU::FLAT_LOAD_DWORDX3_SADDR;
1924+
case 4:
1925+
return AMDGPU::FLAT_LOAD_DWORDX4_SADDR;
1926+
}
1927+
case FLAT_STORE_SADDR:
1928+
switch (Width) {
1929+
default:
1930+
return 0;
1931+
case 2:
1932+
return AMDGPU::FLAT_STORE_DWORDX2_SADDR;
1933+
case 3:
1934+
return AMDGPU::FLAT_STORE_DWORDX3_SADDR;
1935+
case 4:
1936+
return AMDGPU::FLAT_STORE_DWORDX4_SADDR;
1937+
}
18781938
case MIMG:
18791939
assert(((unsigned)llvm::popcount(CI.DMask | Paired.DMask) == Width) &&
18801940
"No overlaps");
@@ -2508,12 +2568,14 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
25082568
OptimizeListAgain |= CI.Width + Paired.Width < 4;
25092569
break;
25102570
case FLAT_LOAD:
2571+
case FLAT_LOAD_SADDR:
25112572
case GLOBAL_LOAD:
25122573
case GLOBAL_LOAD_SADDR:
25132574
NewMI = mergeFlatLoadPair(CI, Paired, Where->I);
25142575
OptimizeListAgain |= CI.Width + Paired.Width < 4;
25152576
break;
25162577
case FLAT_STORE:
2578+
case FLAT_STORE_SADDR:
25172579
case GLOBAL_STORE:
25182580
case GLOBAL_STORE_SADDR:
25192581
NewMI = mergeFlatStorePair(CI, Paired, Where->I);

0 commit comments

Comments
 (0)