1+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefix=CHECK1 %s
2+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -amdgpu-si-fold-operands-preheader-threshold=10 < %s | FileCheck --check-prefix=CHECK2 %s
3+
4+ define protected amdgpu_kernel void @main (ptr addrspace (1 ) noundef %args.coerce , ptr addrspace (1 ) noundef %args.coerce2 , ptr addrspace (1 ) noundef %args.coerce4 , i32 noundef %args12 ) {
5+ ; CHECK1-LABEL: main:
6+ ; check that non-redundant readfirstlanes are not removed
7+ ; CHECK1: v_readfirstlane_b32
8+ ; check that all redundant readfirstlanes are removed
9+ ; CHECK1-NOT: v_readfirstlane_b32
10+ ; CHECK1: s_endpgm
11+
12+ ; CHECK2-LABEL: main:
13+ ; CHECK2: v_readfirstlane_b32
14+ ; check that all redundant readfirstlanes across basic blocks persist
15+ ; CHECK2: v_readfirstlane_b32
16+ ; CHECK2: v_readfirstlane_b32
17+ ; CHECK2: s_endpgm
18+ entry:
19+ %wid = tail call noundef range(i32 0 , 1024 ) i32 @llvm.amdgcn.workitem.id.x ()
20+ %div1 = lshr i32 %wid , 6
21+ %rfl1 = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32 (i32 %div1 )
22+ %sub1 = add nsw i32 %args12 , 1023
23+ %div2 = sdiv i32 %sub1 , 1024
24+ %rfl2 = tail call i32 @llvm.amdgcn.readfirstlane.i32 (i32 %div2 )
25+ %cmp24.i = icmp sgt i32 %rfl2 , 0
26+ br i1 %cmp24.i , label %for.body.lr.ph.i , label %add.exit
27+
28+ for.body.lr.ph.i: ; preds = %entry
29+ %pti1 = ptrtoint ptr addrspace (1 ) %args.coerce4 to i64
30+ %pti2 = ptrtoint ptr addrspace (1 ) %args.coerce2 to i64
31+ %pti3 = ptrtoint ptr addrspace (1 ) %args.coerce to i64
32+ %lshr1 = lshr i32 %rfl1 , 2
33+ %mbl = tail call i32 @llvm.amdgcn.mbcnt.lo (i32 -1 , i32 0 )
34+ %mbh = tail call noundef i32 @llvm.amdgcn.mbcnt.hi (i32 -1 , i32 %mbl )
35+ %lshr2 = lshr i32 %mbh , 6
36+ %add8 = add i32 %lshr1 , %lshr2
37+ %sub3 = shl i32 %rfl1 , 8
38+ %mul2 = and i32 %sub3 , 768
39+ %add1 = or disjoint i32 %mbh , %mul2
40+ %add3 = add nsw i32 %add1 , %add8
41+ %sext1 = add i64 4294967296 , 4611686014132420608
42+ %conv1 = lshr exact i64 64 , 32
43+ %add4 = add nuw nsw i64 %conv1 , 1
44+ %zext2 = zext i32 1 to i64
45+ %tmp.sroa = add nuw nsw i64 %zext2 , 4294967295
46+ %sub5 = add i64 %tmp.sroa , 4294967296
47+ %sext2 = mul i64 %sub5 , 4294967296
48+ %conv2 = lshr exact i64 %sext2 , 32
49+ %add5 = add nuw nsw i64 %add4 , %conv2
50+ %conv3 = trunc i64 %add5 to i32
51+ %mul4 = shl i32 %conv3 , 2
52+ %bc1 = bitcast i64 %pti3 to <2 x i32 >
53+ %ee1 = extractelement <2 x i32 > %bc1 , i64 0
54+ %ee2 = extractelement <2 x i32 > %bc1 , i64 1
55+ br label %for.body.i
56+
57+ for.body.i: ; preds = %for.body.i, %for.body.lr.ph.i
58+ %loopi = phi i32 [ 0 , %for.body.lr.ph.i ], [ %inc.i , %for.body.i ]
59+ %tmp1 = phi i32 [ %add3 , %for.body.lr.ph.i ], [ %cnt , %for.body.i ]
60+ %rfl3 = tail call i32 @llvm.amdgcn.readfirstlane.i32 (i32 %ee1 )
61+ %rfl4 = tail call i32 @llvm.amdgcn.readfirstlane.i32 (i32 %ee2 )
62+ %rfl5 = tail call i32 @llvm.amdgcn.readfirstlane.i32 (i32 %mul4 )
63+ %ie1 = insertelement <4 x i32 > <i32 poison, i32 poison, i32 poison, i32 131072 >, i32 %rfl3 , i64 0
64+ %ie2 = insertelement <4 x i32 > %ie1 , i32 %rfl4 , i64 1
65+ %ie3 = insertelement <4 x i32 > %ie2 , i32 %rfl5 , i64 2
66+ %mul5 = shl i32 %tmp1 , 2
67+ %buffload1 = tail call contract noundef <4 x float > @llvm.amdgcn.raw.buffer.load.v4f32 (<4 x i32 > noundef %ie2 , i32 noundef %mul5 , i32 noundef 0 , i32 noundef 0 ) #6
68+ %add6 = add nsw i32 %tmp1 , 1
69+ %buffload3 = tail call contract noundef <4 x float > @llvm.amdgcn.raw.buffer.load.v4f32 (<4 x i32 > noundef %ie3 , i32 noundef %mul5 , i32 noundef 0 , i32 noundef 0 ) #6
70+ %vec_add1 = fadd contract <4 x float > %buffload1 , %buffload3
71+ tail call void @llvm.amdgcn.raw.buffer.store.v4f32 (<4 x float > noundef %vec_add1 , <4 x i32 > noundef %ie3 , i32 noundef %mul5 , i32 noundef 0 , i32 noundef 0 ) #6
72+ %cnt = add nsw i32 %tmp1 , 1024
73+ %inc.i = add nuw nsw i32 %loopi , 1
74+ %exitcond.not.i = icmp eq i32 %inc.i , %rfl2
75+ br i1 %exitcond.not.i , label %add.exit , label %for.body.i
76+
77+ add .exit: ; preds = %for.body.i, %entry
78+ ret void
79+ }
0 commit comments