@@ -137,8 +137,8 @@ exit: ; preds = %exit.loopexit, %entry
137137 ret i32 %res.0.lcssa
138138}
139139
140- define i32 @common_sext_different_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
141- ; CHECK-LABEL: define i32 @common_sext_different_types (
140+ define i32 @common_sext_different_src_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
141+ ; CHECK-LABEL: define i32 @common_sext_different_src_types (
142142; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
143143; CHECK-NEXT: [[ENTRY:.*:]]
144144; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
@@ -222,8 +222,9 @@ exit: ; preds = %exit.loopexit, %entry
222222 %res.0.lcssa = phi i32 [ %add3 , %for.body ]
223223 ret i32 %res.0.lcssa
224224}
225- define i32 @common_zext_different_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
226- ; CHECK-LABEL: define i32 @common_zext_different_types(
225+
226+ define i32 @common_zext_different_src_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
227+ ; CHECK-LABEL: define i32 @common_zext_different_src_types(
227228; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
228229; CHECK-NEXT: [[ENTRY:.*:]]
229230; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
@@ -307,3 +308,153 @@ exit: ; preds = %exit.loopexit, %entry
307308 %res.0.lcssa = phi i32 [ %add3 , %for.body ]
308309 ret i32 %res.0.lcssa
309310}
311+
312+ define i32 @common_sext_different_dest_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
313+ ; CHECK-LABEL: define i32 @common_sext_different_dest_types(
314+ ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
315+ ; CHECK-NEXT: [[ENTRY:.*:]]
316+ ; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
317+ ; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
318+ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
319+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
320+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
321+ ; CHECK: [[VECTOR_PH]]:
322+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
323+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
324+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
325+ ; CHECK: [[VECTOR_BODY]]:
326+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
327+ ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]]
328+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]]
329+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]]
330+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 0
331+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
332+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 0
333+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
334+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 0
335+ ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
336+ ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
337+ ; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i32>
338+ ; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i16>
339+ ; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[WIDE_LOAD2]] to <4 x i16>
340+ ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP7]]
341+ ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <4 x i16> [[TMP9]], [[TMP8]]
342+ ; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
343+ ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP10]], [[TMP12]]
344+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
345+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
346+ ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
347+ ; CHECK: [[MIDDLE_BLOCK]]:
348+ ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
349+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
350+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
351+ ; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
352+ ; CHECK: [[SCALAR_PH]]:
353+ ;
354+ entry:
355+ %cmp28.not = icmp ult i32 %N , 2
356+ %div27 = lshr i32 %N , 1
357+ %wide.trip.count = zext nneg i32 %div27 to i64
358+ br label %for.body
359+
360+ for.body: ; preds = %for.body.preheader, %for.body
361+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
362+ %res = phi i32 [ 0 , %entry ], [ %add3 , %for.body ]
363+ %a.ptr = getelementptr inbounds nuw i8 , ptr %a , i64 %iv
364+ %b.ptr = getelementptr inbounds nuw i8 , ptr %b , i64 %iv
365+ %b.ptr2 = getelementptr inbounds nuw i16 , ptr %b , i64 %iv
366+ %c.ptr = getelementptr inbounds nuw i8 , ptr %c , i64 %iv
367+ %a.val = load i8 , ptr %a.ptr , align 1
368+ %b.val = load i8 , ptr %b.ptr , align 1
369+ %c.val = load i8 , ptr %c.ptr , align 1
370+ %a.ext = sext i8 %a.val to i32
371+ %b.ext = sext i8 %b.val to i32
372+ %b.ext2 = sext i8 %b.val to i16
373+ %c.ext = sext i8 %c.val to i16
374+ %add = add nsw i32 %a.ext , %b.ext
375+ %add2 = add nsw i16 %c.ext , %b.ext2
376+ %add2.ext = sext i16 %add2 to i32
377+ %add3 = add i32 %add , %add2.ext
378+ %iv.next = add nuw nsw i64 %iv , 1
379+ %exitcond = icmp eq i64 %iv.next , %wide.trip.count
380+ br i1 %exitcond , label %exit , label %for.body
381+
382+ exit: ; preds = %exit.loopexit, %entry
383+ %res.0.lcssa = phi i32 [ %add3 , %for.body ]
384+ ret i32 %res.0.lcssa
385+ }
386+
387+ define i32 @common_zext_different_dest_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
388+ ; CHECK-LABEL: define i32 @common_zext_different_dest_types(
389+ ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
390+ ; CHECK-NEXT: [[ENTRY:.*:]]
391+ ; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
392+ ; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
393+ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
394+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
395+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
396+ ; CHECK: [[VECTOR_PH]]:
397+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
398+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
399+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
400+ ; CHECK: [[VECTOR_BODY]]:
401+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
402+ ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]]
403+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]]
404+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]]
405+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 0
406+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
407+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 0
408+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
409+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 0
410+ ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
411+ ; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
412+ ; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32>
413+ ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i16>
414+ ; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i16>
415+ ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP7]]
416+ ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <4 x i16> [[TMP9]], [[TMP8]]
417+ ; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32>
418+ ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP10]], [[TMP12]]
419+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
420+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
421+ ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
422+ ; CHECK: [[MIDDLE_BLOCK]]:
423+ ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
424+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
425+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
426+ ; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
427+ ; CHECK: [[SCALAR_PH]]:
428+ ;
429+ entry:
430+ %cmp28.not = icmp ult i32 %N , 2
431+ %div27 = lshr i32 %N , 1
432+ %wide.trip.count = zext nneg i32 %div27 to i64
433+ br label %for.body
434+
435+ for.body: ; preds = %for.body.preheader, %for.body
436+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
437+ %res = phi i32 [ 0 , %entry ], [ %add3 , %for.body ]
438+ %a.ptr = getelementptr inbounds nuw i8 , ptr %a , i64 %iv
439+ %b.ptr = getelementptr inbounds nuw i8 , ptr %b , i64 %iv
440+ %b.ptr2 = getelementptr inbounds nuw i16 , ptr %b , i64 %iv
441+ %c.ptr = getelementptr inbounds nuw i8 , ptr %c , i64 %iv
442+ %a.val = load i8 , ptr %a.ptr , align 1
443+ %b.val = load i8 , ptr %b.ptr , align 1
444+ %c.val = load i8 , ptr %c.ptr , align 1
445+ %a.ext = zext i8 %a.val to i32
446+ %b.ext = zext i8 %b.val to i32
447+ %b.ext2 = zext i8 %b.val to i16
448+ %c.ext = zext i8 %c.val to i16
449+ %add = add nsw i32 %a.ext , %b.ext
450+ %add2 = add nsw i16 %c.ext , %b.ext2
451+ %add2.ext = zext i16 %add2 to i32
452+ %add3 = add i32 %add , %add2.ext
453+ %iv.next = add nuw nsw i64 %iv , 1
454+ %exitcond = icmp eq i64 %iv.next , %wide.trip.count
455+ br i1 %exitcond , label %exit , label %for.body
456+
457+ exit: ; preds = %exit.loopexit, %entry
458+ %res.0.lcssa = phi i32 [ %add3 , %for.body ]
459+ ret i32 %res.0.lcssa
460+ }
0 commit comments