@@ -115,61 +115,46 @@ define amdgpu_cs void @mixed_vmem_types(i32 inreg %globalTable, i32 inreg %perSh
115115; GFX12-GISEL-NEXT: buffer_store_b32 v0, off, s[24:27], null
116116; GFX12-GISEL-NEXT: s_endpgm
117117.entry:
118- %0 = call i64 @llvm.amdgcn.s.getpc ()
119- %extelt.offset = lshr i64 %0 , 32
118+ %i = call i64 @llvm.amdgcn.s.getpc ()
119+ %extelt.offset = lshr i64 %i , 32
120120 %.i1 = trunc i64 %extelt.offset to i32
121121 %.upto0 = insertelement <2 x i32 > poison, i32 %descTable1 , i64 0
122- %1 = insertelement <2 x i32 > %.upto0 , i32 %.i1 , i64 1
123- %2 = bitcast <2 x i32 > %1 to i64
124- %3 = inttoptr i64 %2 to ptr addrspace (4 )
122+ %i1 = insertelement <2 x i32 > %.upto0 , i32 %.i1 , i64 1
123+ %i2 = bitcast <2 x i32 > %i1 to i64
124+ %i3 = inttoptr i64 %i2 to ptr addrspace (4 )
125125 %.upto03 = insertelement <2 x i32 > poison, i32 %descTable0 , i64 0
126- %4 = insertelement <2 x i32 > %.upto03 , i32 %.i1 , i64 1
127- %5 = bitcast <2 x i32 > %4 to i64
128- %6 = inttoptr i64 %5 to ptr addrspace (4 )
129- %7 = getelementptr i8 , ptr addrspace (4 ) %6 , i64 80
130- %8 = load <4 x i32 >, ptr addrspace (4 ) %7 , align 16
131- %9 = getelementptr i8 , ptr addrspace (4 ) %3 , i64 48
132- %10 = load <4 x i32 >, ptr addrspace (4 ) %9 , align 16
133- %11 = getelementptr i8 , ptr addrspace (4 ) %6 , i64 64
134- %12 = load <4 x i32 >, ptr addrspace (4 ) %11 , align 16
135- %13 = getelementptr i8 , ptr addrspace (4 ) %6 , i64 16
136- %14 = load <4 x i32 >, ptr addrspace (4 ) %13 , align 16
137- %15 = getelementptr i8 , ptr addrspace (4 ) %6 , i64 32
138- %16 = load <8 x i32 >, ptr addrspace (4 ) %15 , align 32
139- %17 = load <4 x i32 >, ptr addrspace (4 ) %6 , align 16
140- %18 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f16 (i32 1 , half 0xHBC00, half 0xHBC00, <8 x i32 > %16 , <4 x i32 > %17 , i1 false , i32 0 , i32 0 )
141- %19 = fcmp oeq float %18 , 0 .000000e+00
142- %20 = call i32 @llvm.amdgcn.raw.buffer.load.i32 (<4 x i32 > %14 , i32 0 , i32 0 , i32 0 )
143- %.not = icmp eq i32 %20 , 2752
144- %21 = call i32 @llvm.amdgcn.raw.buffer.load.i32 (<4 x i32 > %12 , i32 0 , i32 0 , i32 0 )
145- %.not1 = icmp eq i32 %21 , 2752
146- %22 = getelementptr i8 , ptr addrspace (4 ) %3 , i64 16
147- %23 = load <8 x i32 >, ptr addrspace (4 ) %22 , align 32
148- %24 = load <4 x i32 >, ptr addrspace (4 ) %3 , align 16
149- %25 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f16 (i32 1 , half 0xHBC00, half 0xHBC00, <8 x i32 > %23 , <4 x i32 > %24 , i1 false , i32 0 , i32 0 )
150- %26 = fcmp oeq float %25 , 1 .000000e+00
151- %27 = call i32 @llvm.amdgcn.raw.buffer.load.i32 (<4 x i32 > %10 , i32 0 , i32 0 , i32 0 )
152- %.not2 = icmp eq i32 %27 , 2752
153- %28 = select i1 %.not2 , i1 %26 , i1 false
154- %29 = select i1 %28 , i1 %.not1 , i1 false
155- %30 = select i1 %29 , i1 %.not , i1 false
156- %narrow2 = select i1 %30 , i1 %19 , i1 false
126+ %i4 = insertelement <2 x i32 > %.upto03 , i32 %.i1 , i64 1
127+ %i5 = bitcast <2 x i32 > %i4 to i64
128+ %i6 = inttoptr i64 %i5 to ptr addrspace (4 )
129+ %i7 = getelementptr i8 , ptr addrspace (4 ) %i6 , i64 80
130+ %i8 = load <4 x i32 >, ptr addrspace (4 ) %i7 , align 16
131+ %i9 = getelementptr i8 , ptr addrspace (4 ) %i3 , i64 48
132+ %i10 = load <4 x i32 >, ptr addrspace (4 ) %i9 , align 16
133+ %i11 = getelementptr i8 , ptr addrspace (4 ) %i6 , i64 64
134+ %i12 = load <4 x i32 >, ptr addrspace (4 ) %i11 , align 16
135+ %i13 = getelementptr i8 , ptr addrspace (4 ) %i6 , i64 16
136+ %i14 = load <4 x i32 >, ptr addrspace (4 ) %i13 , align 16
137+ %i15 = getelementptr i8 , ptr addrspace (4 ) %i6 , i64 32
138+ %i16 = load <8 x i32 >, ptr addrspace (4 ) %i15 , align 32
139+ %i17 = load <4 x i32 >, ptr addrspace (4 ) %i6 , align 16
140+ %i18 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f16.v8i32.v4i32 (i32 1 , half 0xHBC00, half 0xHBC00, <8 x i32 > %i16 , <4 x i32 > %i17 , i1 false , i32 0 , i32 0 )
141+ %i19 = fcmp oeq float %i18 , 0 .000000e+00
142+ %i20 = call i32 @llvm.amdgcn.raw.buffer.load.i32 (<4 x i32 > %i14 , i32 0 , i32 0 , i32 0 )
143+ %.not = icmp eq i32 %i20 , 2752
144+ %i21 = call i32 @llvm.amdgcn.raw.buffer.load.i32 (<4 x i32 > %i12 , i32 0 , i32 0 , i32 0 )
145+ %.not1 = icmp eq i32 %i21 , 2752
146+ %i22 = getelementptr i8 , ptr addrspace (4 ) %i3 , i64 16
147+ %i23 = load <8 x i32 >, ptr addrspace (4 ) %i22 , align 32
148+ %i24 = load <4 x i32 >, ptr addrspace (4 ) %i3 , align 16
149+ %i25 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f16.v8i32.v4i32 (i32 1 , half 0xHBC00, half 0xHBC00, <8 x i32 > %i23 , <4 x i32 > %i24 , i1 false , i32 0 , i32 0 )
150+ %i26 = fcmp oeq float %i25 , 1 .000000e+00
151+ %i27 = call i32 @llvm.amdgcn.raw.buffer.load.i32 (<4 x i32 > %i10 , i32 0 , i32 0 , i32 0 )
152+ %.not2 = icmp eq i32 %i27 , 2752
153+ %i28 = select i1 %.not2 , i1 %i26 , i1 false
154+ %i29 = select i1 %i28 , i1 %.not1 , i1 false
155+ %i30 = select i1 %i29 , i1 %.not , i1 false
156+ %narrow2 = select i1 %i30 , i1 %i19 , i1 false
157157 %.4 = zext i1 %narrow2 to i32
158- call void @llvm.amdgcn.raw.buffer.store.i32 (i32 %.4 , <4 x i32 > %8 , i32 0 , i32 0 , i32 0 )
158+ call void @llvm.amdgcn.raw.buffer.store.i32 (i32 %.4 , <4 x i32 > %i8 , i32 0 , i32 0 , i32 0 )
159159 ret void
160160}
161-
162- declare <4 x float > @llvm.amdgcn.image.sample.l.2d.v4f32.f32 (i32 immarg, float , float , float , <8 x i32 >, <4 x i32 >, i1 immarg, i32 immarg, i32 immarg) #1
163- declare ptr addrspace (7 ) @lgc.late.launder.fat.pointer (<4 x i32 >) #2
164- declare i64 @llvm.amdgcn.s.getpc () #3
165- declare <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32 (i32 immarg, float , float , <8 x i32 >, <4 x i32 >, i1 immarg, i32 immarg, i32 immarg) #1
166- declare <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f16 (i32 immarg, half , half , <8 x i32 >, <4 x i32 >, i1 immarg, i32 immarg, i32 immarg) #1
167- declare float @llvm.amdgcn.image.sample.lz.2d.f32.f16 (i32 immarg, half , half , <8 x i32 >, <4 x i32 >, i1 immarg, i32 immarg, i32 immarg) #1
168- declare i32 @llvm.amdgcn.raw.buffer.load.i32 (<4 x i32 >, i32 , i32 , i32 immarg) #1
169- declare void @llvm.amdgcn.raw.buffer.store.i32 (i32 , <4 x i32 >, i32 , i32 , i32 immarg) #4
170-
171- attributes #0 = { nounwind }
172- attributes #1 = { nounwind willreturn memory(read) }
173- attributes #2 = { nounwind memory(none) }
174- attributes #3 = { nounwind speculatable willreturn memory(none) }
175- attributes #4 = { nounwind willreturn memory(write) }
0 commit comments