@@ -290,3 +290,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
290290 %gep = getelementptr inbounds i8 , ptr %base , i64 %mul
291291 ret ptr %gep
292292}
293+
294+ ; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr.
295+ define amdgpu_kernel void @uniform_base_varying_offset_imm (ptr addrspace (1 ) %p ) {
296+ ; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm:
297+ ; GFX942_PTRADD: ; %bb.0: ; %entry
298+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
299+ ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
300+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0
301+ ; GFX942_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
302+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1
303+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
304+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
305+ ; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off offset:16
306+ ; GFX942_PTRADD-NEXT: s_endpgm
307+ ;
308+ ; GFX942_LEGACY-LABEL: uniform_base_varying_offset_imm:
309+ ; GFX942_LEGACY: ; %bb.0: ; %entry
310+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
311+ ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
312+ ; GFX942_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
313+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 1
314+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
315+ ; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[0:1] offset:16
316+ ; GFX942_LEGACY-NEXT: s_endpgm
317+ entry:
318+ %tid = call i32 @llvm.amdgcn.workitem.id.x ()
319+ %shift = shl i32 %tid , 2
320+ %voffset = zext i32 %shift to i64
321+ %gep1 = getelementptr inbounds i8 , ptr addrspace (1 ) %p , i64 %voffset
322+ %gep2 = getelementptr inbounds i8 , ptr addrspace (1 ) %gep1 , i64 16
323+ store i32 1 , ptr addrspace (1 ) %gep2
324+ ret void
325+ }
326+
327+ ; Adjusted from global-saddr-load.ll. Tests PTRADD handling in
328+ ; AMDGPUDAGToDAGISel::SelectSMRDBaseOffset.
329+ define amdgpu_kernel void @global_load_saddr_i32_uniform_offset (ptr addrspace (1 ) %sbase , i32 %soffset , ptr addrspace (1 ) %r ) {
330+ ; GFX942_PTRADD-LABEL: global_load_saddr_i32_uniform_offset:
331+ ; GFX942_PTRADD: ; %bb.0:
332+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
333+ ; GFX942_PTRADD-NEXT: s_load_dword s6, s[4:5], 0x8
334+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
335+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, 0
336+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
337+ ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, s6
338+ ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, 0
339+ ; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
340+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
341+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, s0
342+ ; GFX942_PTRADD-NEXT: global_store_dword v0, v1, s[2:3]
343+ ; GFX942_PTRADD-NEXT: s_endpgm
344+ ;
345+ ; GFX942_LEGACY-LABEL: global_load_saddr_i32_uniform_offset:
346+ ; GFX942_LEGACY: ; %bb.0:
347+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
348+ ; GFX942_LEGACY-NEXT: s_load_dword s6, s[4:5], 0x8
349+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
350+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, 0
351+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
352+ ; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0
353+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
354+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, s0
355+ ; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[2:3]
356+ ; GFX942_LEGACY-NEXT: s_endpgm
357+ %zext.offset = zext i32 %soffset to i64
358+ %gep0 = getelementptr inbounds i8 , ptr addrspace (1 ) %sbase , i64 %zext.offset
359+ %load = load i32 , ptr addrspace (1 ) %gep0
360+ %to.vgpr = bitcast i32 %load to float
361+ store float %to.vgpr , ptr addrspace (1 ) %r
362+ ret void
363+ }
364+
365+ ; Adjusted from llvm.amdgcn.global.load.lds.ll, tests the offset lowering for
366+ ; Intrinsic::amdgcn_global_load_lds.
367+ define void @global_load_lds_dword_saddr_and_vaddr (ptr addrspace (1 ) nocapture inreg %gptr , ptr addrspace (3 ) nocapture %lptr , i32 %voffset ) {
368+ ; GFX942_PTRADD-LABEL: global_load_lds_dword_saddr_and_vaddr:
369+ ; GFX942_PTRADD: ; %bb.0: ; %main_body
370+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, v1
372+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v3, 0
373+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[0:1], 0, v[2:3]
374+ ; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
375+ ; GFX942_PTRADD-NEXT: s_mov_b32 m0, s0
376+ ; GFX942_PTRADD-NEXT: s_nop 0
377+ ; GFX942_PTRADD-NEXT: global_load_lds_dword v[2:3], off offset:48 sc1
378+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
379+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
380+ ;
381+ ; GFX942_LEGACY-LABEL: global_load_lds_dword_saddr_and_vaddr:
382+ ; GFX942_LEGACY: ; %bb.0: ; %main_body
383+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384+ ; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s2, v0
385+ ; GFX942_LEGACY-NEXT: s_mov_b32 m0, s2
386+ ; GFX942_LEGACY-NEXT: s_nop 0
387+ ; GFX942_LEGACY-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1
388+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
389+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
390+ main_body:
391+ %voffset.64 = zext i32 %voffset to i64
392+ %gep = getelementptr i8 , ptr addrspace (1 ) %gptr , i64 %voffset.64
393+ call void @llvm.amdgcn.global.load.lds (ptr addrspace (1 ) %gep , ptr addrspace (3 ) %lptr , i32 4 , i32 48 , i32 16 )
394+ ret void
395+ }
396+
397+ ; Taken from shl_add_ptr_global.ll, tests PTRADD handling in
398+ ; SITargetLowering::performSHLPtrCombine.
399+ define void @shl_base_global_ptr_global_atomic_fadd (ptr addrspace (1 ) %out , ptr addrspace (1 ) %extra.use , ptr addrspace (1 ) %ptr ) {
400+ ; GFX942_PTRADD-LABEL: shl_base_global_ptr_global_atomic_fadd:
401+ ; GFX942_PTRADD: ; %bb.0:
402+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403+ ; GFX942_PTRADD-NEXT: s_mov_b64 s[0:1], 0x80
404+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
405+ ; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
406+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v6, 0x42c80000
407+ ; GFX942_PTRADD-NEXT: global_atomic_add_f32 v[4:5], v6, off
408+ ; GFX942_PTRADD-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
409+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0)
410+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
411+ ;
412+ ; GFX942_LEGACY-LABEL: shl_base_global_ptr_global_atomic_fadd:
413+ ; GFX942_LEGACY: ; %bb.0:
414+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415+ ; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5]
416+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v6, 0x42c80000
417+ ; GFX942_LEGACY-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512
418+ ; GFX942_LEGACY-NEXT: s_mov_b64 s[0:1], 0x80
419+ ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
420+ ; GFX942_LEGACY-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
421+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0)
422+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
423+ %arrayidx0 = getelementptr inbounds [512 x i32 ], ptr addrspace (1 ) %ptr , i64 0 , i64 32
424+ %cast = ptrtoint ptr addrspace (1 ) %arrayidx0 to i64
425+ %shl = shl i64 %cast , 2
426+ %castback = inttoptr i64 %shl to ptr addrspace (1 )
427+ %unused = atomicrmw fadd ptr addrspace (1 ) %castback , float 100 .0 syncscope("agent" ) monotonic , align 4 , !amdgpu.no.fine.grained.memory !0 , !amdgpu.ignore.denormal.mode !0
428+ store volatile i64 %cast , ptr addrspace (1 ) %extra.use , align 4
429+ ret void
430+ }
431+
432+ ; Test PTRADD handling in TargetLowering::SimplifyDemandedBits and
433+ ; TargetLowering::ShrinkDemandedOp.
434+ define i32 @gep_in_const_as_cast_to_const32_as (ptr addrspace (4 ) %src , i64 %offset ) {
435+ ; GFX942_PTRADD-LABEL: gep_in_const_as_cast_to_const32_as:
436+ ; GFX942_PTRADD: ; %bb.0: ; %entry
437+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
439+ ; GFX942_PTRADD-NEXT: s_mov_b32 s1, 0
440+ ; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
441+ ; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
442+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
443+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, s0
444+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
445+ ;
446+ ; GFX942_LEGACY-LABEL: gep_in_const_as_cast_to_const32_as:
447+ ; GFX942_LEGACY: ; %bb.0: ; %entry
448+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449+ ; GFX942_LEGACY-NEXT: v_add_u32_e32 v0, v0, v2
450+ ; GFX942_LEGACY-NEXT: s_mov_b32 s1, 0
451+ ; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s0, v0
452+ ; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], 0x0
453+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
454+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, s0
455+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
456+ entry:
457+ %gep = getelementptr i8 , ptr addrspace (4 ) %src , i64 %offset
458+ %gep.cast = addrspacecast ptr addrspace (4 ) %gep to ptr addrspace (6 )
459+ %l = load i32 , ptr addrspace (6 ) %gep.cast
460+ ret i32 %l
461+ }
462+
463+ @CG = addrspace (4 ) constant [16 x i32 ] zeroinitializer , align 4
464+
465+ ; Test PTRADD handling in isMemSrcFromConstant.
466+ define void @replace_const0_memcpy_by_memset (ptr align 4 %dst ) {
467+ ; GFX942_PTRADD-LABEL: replace_const0_memcpy_by_memset:
468+ ; GFX942_PTRADD: ; %bb.0: ; %entry
469+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470+ ; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1]
471+ ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, CG@gotpcrel32@lo+4
472+ ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, CG@gotpcrel32@hi+12
473+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
474+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
475+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4
476+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
477+ ; GFX942_PTRADD-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
478+ ; GFX942_PTRADD-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
479+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
480+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
481+ ;
482+ ; GFX942_LEGACY-LABEL: replace_const0_memcpy_by_memset:
483+ ; GFX942_LEGACY: ; %bb.0: ; %entry
484+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v2, 0
486+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v3, v2
487+ ; GFX942_LEGACY-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
488+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
489+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
490+ entry:
491+ %gep = getelementptr i8 , ptr addrspace (4 ) @CG , i64 4
492+ tail call void @llvm.memcpy.p0.p4.i64 (ptr noundef nonnull align 4 %dst , ptr addrspace (4 ) noundef nonnull align 4 %gep , i64 8 , i1 false )
493+ ret void
494+ }
495+
496+ declare void @llvm.memcpy.p0.p4.i64 (ptr noalias nocapture writeonly , ptr addrspace (4 ) noalias nocapture readonly , i64 , i1 immarg)
497+
498+ !0 = !{}
0 commit comments