@@ -223,28 +223,116 @@ define void @merge_i32_2i16_float_4i8(ptr addrspace(1) %ptr1, ptr addrspace(2) %
223223 ret void
224224}
225225
226- define void @merge_fp_type (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
227- ; CHECK-OOB-RELAXED-LABEL: define void @merge_fp_type (
226+ define void @merge_fp_v2half_type (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
227+ ; CHECK-OOB-RELAXED-LABEL: define void @merge_fp_v2half_type (
228228; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1]] {
229229; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 0
230230; CHECK-OOB-RELAXED-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP1]], align 4
231231; CHECK-OOB-RELAXED-NEXT: [[LOAD11:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
232232; CHECK-OOB-RELAXED-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
233233; CHECK-OOB-RELAXED-NEXT: [[DOTCAST:%.*]] = bitcast float [[TMP2]] to <2 x half>
234+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
235+ ; CHECK-OOB-RELAXED-NEXT: [[DOTCAST_CAST:%.*]] = bitcast <2 x half> [[DOTCAST]] to i32
236+ ; CHECK-OOB-RELAXED-NEXT: [[TMP3:%.*]] = bitcast float [[LOAD11]] to i32
237+ ; CHECK-OOB-RELAXED-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
238+ ; CHECK-OOB-RELAXED-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[DOTCAST_CAST]], i32 1
239+ ; CHECK-OOB-RELAXED-NEXT: store <2 x i32> [[TMP5]], ptr addrspace(2) [[STORE_GEP1]], align 4
234240; CHECK-OOB-RELAXED-NEXT: ret void
235241;
236- ; CHECK-OOB-STRICT-LABEL: define void @merge_fp_type (
242+ ; CHECK-OOB-STRICT-LABEL: define void @merge_fp_v2half_type (
237243; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
238244; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 0
239245; CHECK-OOB-STRICT-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP1]], align 4
240246; CHECK-OOB-STRICT-NEXT: [[LOAD11:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
241247; CHECK-OOB-STRICT-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
242248; CHECK-OOB-STRICT-NEXT: [[DOTCAST:%.*]] = bitcast float [[TMP2]] to <2 x half>
249+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
250+ ; CHECK-OOB-STRICT-NEXT: [[DOTCAST_CAST:%.*]] = bitcast <2 x half> [[DOTCAST]] to i32
251+ ; CHECK-OOB-STRICT-NEXT: [[TMP3:%.*]] = bitcast float [[LOAD11]] to i32
252+ ; CHECK-OOB-STRICT-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
253+ ; CHECK-OOB-STRICT-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[DOTCAST_CAST]], i32 1
254+ ; CHECK-OOB-STRICT-NEXT: store <2 x i32> [[TMP5]], ptr addrspace(2) [[STORE_GEP1]], align 4
243255; CHECK-OOB-STRICT-NEXT: ret void
244256;
245257 %gep1 = getelementptr inbounds float , ptr addrspace (1 ) %ptr1 , i64 0
246258 %load1 = load float , ptr addrspace (1 ) %gep1 , align 4
247259 %gep2 = getelementptr inbounds <2 x half >, ptr addrspace (1 ) %ptr1 , i64 1
248260 %load2 = load <2 x half >, ptr addrspace (1 ) %gep2 , align 4
261+ %store.gep1 = getelementptr inbounds i32 , ptr addrspace (2 ) %ptr2 , i64 0
262+ store float %load1 , ptr addrspace (2 ) %store.gep1 , align 4
263+ %store.gep2 = getelementptr inbounds <2 x half >, ptr addrspace (2 ) %ptr2 , i64 1
264+ store <2 x half > %load2 , ptr addrspace (2 ) %store.gep2 , align 4
265+ ret void
266+ }
267+
268+ define void @merge_v2half_bfloat_type (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
269+ ; CHECK-OOB-RELAXED-LABEL: define void @merge_v2half_bfloat_type(
270+ ; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1]] {
271+ ; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds bfloat, ptr addrspace(1) [[PTR1]], i64 0
272+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD1:%.*]] = load bfloat, ptr addrspace(1) [[GEP1]], align 4
273+ ; CHECK-OOB-RELAXED-NEXT: [[GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[PTR1]], i64 1
274+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[GEP2]], align 4
275+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
276+ ; CHECK-OOB-RELAXED-NEXT: store bfloat [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
277+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(2) [[PTR2]], i64 1
278+ ; CHECK-OOB-RELAXED-NEXT: store <2 x half> [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
279+ ; CHECK-OOB-RELAXED-NEXT: ret void
280+ ;
281+ ; CHECK-OOB-STRICT-LABEL: define void @merge_v2half_bfloat_type(
282+ ; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
283+ ; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds bfloat, ptr addrspace(1) [[PTR1]], i64 0
284+ ; CHECK-OOB-STRICT-NEXT: [[LOAD1:%.*]] = load bfloat, ptr addrspace(1) [[GEP1]], align 4
285+ ; CHECK-OOB-STRICT-NEXT: [[GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[PTR1]], i64 1
286+ ; CHECK-OOB-STRICT-NEXT: [[LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[GEP2]], align 4
287+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
288+ ; CHECK-OOB-STRICT-NEXT: store bfloat [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
289+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(2) [[PTR2]], i64 1
290+ ; CHECK-OOB-STRICT-NEXT: store <2 x half> [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
291+ ; CHECK-OOB-STRICT-NEXT: ret void
292+ ;
293+ %gep1 = getelementptr inbounds bfloat, ptr addrspace (1 ) %ptr1 , i64 0
294+ %load1 = load bfloat, ptr addrspace (1 ) %gep1 , align 4
295+ %gep2 = getelementptr inbounds <2 x half >, ptr addrspace (1 ) %ptr1 , i64 1
296+ %load2 = load <2 x half >, ptr addrspace (1 ) %gep2 , align 4
297+ %store.gep1 = getelementptr inbounds i32 , ptr addrspace (2 ) %ptr2 , i64 0
298+ store bfloat %load1 , ptr addrspace (2 ) %store.gep1 , align 4
299+ %store.gep2 = getelementptr inbounds <2 x half >, ptr addrspace (2 ) %ptr2 , i64 1
300+ store <2 x half > %load2 , ptr addrspace (2 ) %store.gep2 , align 4
301+ ret void
302+ }
303+
304+ define void @no_merge_mixed_ptr_addrspaces (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
305+ ; CHECK-OOB-RELAXED-LABEL: define void @merge_mixed_ptr_addrspaces(
306+ ; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1]] {
307+ ; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR1]], i64 0
308+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD1:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[GEP1]], align 4
309+ ; CHECK-OOB-RELAXED-NEXT: [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(1) [[PTR1]], i64 1
310+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD2:%.*]] = load ptr addrspace(2), ptr addrspace(1) [[GEP2]], align 4
311+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
312+ ; CHECK-OOB-RELAXED-NEXT: store ptr addrspace(1) [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
313+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(2) [[PTR2]], i64 1
314+ ; CHECK-OOB-RELAXED-NEXT: store ptr addrspace(2) [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
315+ ; CHECK-OOB-RELAXED-NEXT: ret void
316+ ;
317+ ; CHECK-OOB-STRICT-LABEL: define void @merge_mixed_ptr_addrspaces(
318+ ; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
319+ ; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR1]], i64 0
320+ ; CHECK-OOB-STRICT-NEXT: [[LOAD1:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[GEP1]], align 4
321+ ; CHECK-OOB-STRICT-NEXT: [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(1) [[PTR1]], i64 1
322+ ; CHECK-OOB-STRICT-NEXT: [[LOAD2:%.*]] = load ptr addrspace(2), ptr addrspace(1) [[GEP2]], align 4
323+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
324+ ; CHECK-OOB-STRICT-NEXT: store ptr addrspace(1) [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
325+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(2) [[PTR2]], i64 1
326+ ; CHECK-OOB-STRICT-NEXT: store ptr addrspace(2) [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
327+ ; CHECK-OOB-STRICT-NEXT: ret void
328+ ;
329+ %gep1 = getelementptr inbounds ptr addrspace (1 ), ptr addrspace (1 ) %ptr1 , i64 0
330+ %load1 = load ptr addrspace (1 ), ptr addrspace (1 ) %gep1 , align 4
331+ %gep2 = getelementptr inbounds ptr addrspace (2 ), ptr addrspace (1 ) %ptr1 , i64 1
332+ %load2 = load ptr addrspace (2 ), ptr addrspace (1 ) %gep2 , align 4
333+ %store.gep1 = getelementptr inbounds i32 , ptr addrspace (2 ) %ptr2 , i64 0
334+ store ptr addrspace (1 ) %load1 , ptr addrspace (2 ) %store.gep1 , align 4
335+ %store.gep2 = getelementptr inbounds ptr addrspace (2 ), ptr addrspace (2 ) %ptr2 , i64 1
336+ store ptr addrspace (2 ) %load2 , ptr addrspace (2 ) %store.gep2 , align 4
249337 ret void
250338}
0 commit comments