Skip to content

Commit 5eea98c

Browse files
committed
remove and add
1 parent ed14825 commit 5eea98c

File tree

3 files changed

+50
-6
lines changed

3 files changed

+50
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
747747

748748
getActionDefinitionsBuilder(G_ADD)
749749
.legalFor(ST.hasLshlAddB64()
750-
? std::initializer_list<LLT>{S32, S16, V2S16, S64}
750+
? std::initializer_list<LLT>{S64, S32, S16, V2S16}
751751
: std::initializer_list<LLT>{S32, S16, V2S16})
752752
.clampMaxNumElementsStrict(0, S16, 2)
753753
.scalarize(0)

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -774,11 +774,6 @@ def : GCNPat<
774774
(V_LSHL_ADD_U64_e64 VSrc_b64:$src1, VSrc_b32:$shift, VSrc_b64:$src0)
775775
>;
776776

777-
def : GCNPat<
778-
(ptradd i64:$src0, i64:$src1),
779-
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, (i32 0), VSrc_b64:$src1)
780-
>;
781-
782777
def : GCNPat<
783778
(ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2),
784779
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)

llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,52 @@ define i32 @lshl_add_u64_gep(ptr %p, i64 %a) {
207207
%v = load i32, ptr %gep
208208
ret i32 %v
209209
}
210+
211+
@arr = global [10 x [10 x i64]] zeroinitializer
212+
define i64 @lshl_add_u64_gep_shift(i64 %row, i64 %col) {
213+
; GCN-LABEL: lshl_add_u64_gep_shift:
214+
; GCN: ; %bb.0: ; %entry
215+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216+
; GCN-NEXT: s_getpc_b64 s[0:1]
217+
; GCN-NEXT: s_add_u32 s0, s0, arr@gotpcrel32@lo+4
218+
; GCN-NEXT: s_addc_u32 s1, s1, arr@gotpcrel32@hi+12
219+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
220+
; GCN-NEXT: s_movk_i32 s2, 0x50
221+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
222+
; GCN-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
223+
; GCN-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s2, v[4:5]
224+
; GCN-NEXT: v_mov_b32_e32 v0, v5
225+
; GCN-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v1, s2, v[0:1]
226+
; GCN-NEXT: v_mov_b32_e32 v5, v0
227+
; GCN-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[4:5]
228+
; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
229+
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
230+
; GCN-NEXT: s_setpc_b64 s[30:31]
231+
;
232+
; GI-LABEL: lshl_add_u64_gep_shift:
233+
; GI: ; %bb.0: ; %entry
234+
; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235+
; GI-NEXT: s_getpc_b64 s[0:1]
236+
; GI-NEXT: s_add_u32 s0, s0, arr@gotpcrel32@lo+4
237+
; GI-NEXT: s_addc_u32 s1, s1, arr@gotpcrel32@hi+12
238+
; GI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
239+
; GI-NEXT: v_mov_b32_e32 v6, 0x50
240+
; GI-NEXT: v_mad_u64_u32 v[4:5], s[2:3], v0, v6, 0
241+
; GI-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v1, v6, 0
242+
; GI-NEXT: v_add_u32_e32 v5, v5, v0
243+
; GI-NEXT: s_waitcnt lgkmcnt(0)
244+
; GI-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
245+
; GI-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4
246+
; GI-NEXT: s_nop 1
247+
; GI-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
248+
; GI-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
249+
; GI-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
250+
; GI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
251+
; GI-NEXT: s_setpc_b64 s[30:31]
252+
entry:
253+
%base = getelementptr [10 x [10 x i64]], ptr @arr, i64 0, i64 %row, i64 0
254+
%shifted_col = shl i64 %col, 2 ; multiply by sizeof(i64) (shift left by 2)
255+
%ptr = getelementptr i8, ptr %base, i64 %shifted_col
256+
%val = load i64, ptr %ptr
257+
ret i64 %val
258+
}

0 commit comments

Comments
 (0)