Skip to content

Commit ad0acf4

Browse files
AMDGPU/GlobalISel: Combine S16 copy-trunc-readanylane-anyext (#168410)
1 parent 1580f4b commit ad0acf4

File tree

3 files changed

+8
-13
lines changed

3 files changed

+8
-13
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
173173
if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
174174
return RALSrc;
175175

176+
// RALSrc = G_ANYEXT S16Src
177+
// TruncSrc = G_AMDGPU_READANYLANE RALSrc
178+
// Src = G_TRUNC TruncSrc
179+
if (mi_match(Src, MRI,
180+
m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) {
181+
return RALSrc;
182+
}
183+
176184
// TruncSrc = G_AMDGPU_READANYLANE RALSrc
177185
// AextSrc = G_TRUNC TruncSrc
178186
// Src = G_ANYEXT AextSrc

llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
10701070
; GFX11-True16-NEXT: v_mov_b32_e32 v1, s0
10711071
; GFX11-True16-NEXT: ds_load_u16_d16 v1, v1
10721072
; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
1073-
; GFX11-True16-NEXT: v_readfirstlane_b32 s0, v1
1074-
; GFX11-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1075-
; GFX11-True16-NEXT: v_mov_b16_e32 v1.l, s0
10761073
; GFX11-True16-NEXT: ds_store_b16 v0, v1
10771074
; GFX11-True16-NEXT: s_endpgm
10781075
;
@@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
10891086
; GFX12-True16-NEXT: v_mov_b32_e32 v1, s0
10901087
; GFX12-True16-NEXT: ds_load_u16_d16 v1, v1
10911088
; GFX12-True16-NEXT: s_wait_dscnt 0x0
1092-
; GFX12-True16-NEXT: v_readfirstlane_b32 s0, v1
1093-
; GFX12-True16-NEXT: s_wait_alu 0xf1ff
1094-
; GFX12-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1095-
; GFX12-True16-NEXT: v_mov_b16_e32 v1.l, s0
10961089
; GFX12-True16-NEXT: ds_store_b16 v0, v1
10971090
; GFX12-True16-NEXT: s_endpgm
10981091
;

llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p
1313
; GFX11-NEXT: v_mov_b32_e32 v2, 0
1414
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
1515
; GFX11-NEXT: s_waitcnt vmcnt(0)
16-
; GFX11-NEXT: v_readfirstlane_b32 s0, v2
17-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
18-
; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
1916
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
2017
; GFX11-NEXT: s_endpgm
2118
;
@@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p
312309
; GFX11-NEXT: v_mov_b32_e32 v2, 0
313310
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
314311
; GFX11-NEXT: s_waitcnt vmcnt(0)
315-
; GFX11-NEXT: v_readfirstlane_b32 s0, v2
316-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
317-
; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
318312
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
319313
; GFX11-NEXT: s_endpgm
320314
;

0 commit comments

Comments
 (0)