@@ -346,3 +346,189 @@ entry:
346346 call void @use.i32 (i32 %ext.3 )
347347 ret void
348348}
349+
350+ define noundef i32 @zext_v4i8_all_lanes_used_no_freeze (<4 x i8 > %src ) {
351+ ; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
352+ ; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
353+ ; CHECK-NEXT: [[ENTRY:.*:]]
354+ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[SRC]] to i32
355+ ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
356+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0]], 16
357+ ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255
358+ ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
359+ ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
360+ ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 255
361+ ; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
362+ ; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
363+ ; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
364+ ; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
365+ ; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
366+ ; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP6]], [[TMP5]]
367+ ; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP3]]
368+ ; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
369+ ; CHECK-NEXT: ret i32 [[ADD3]]
370+ ;
371+ entry:
372+ %ext = zext nneg <4 x i8 > %src to <4 x i32 >
373+ %ext.0 = extractelement <4 x i32 > %ext , i64 0
374+ %ext.1 = extractelement <4 x i32 > %ext , i64 1
375+ %ext.2 = extractelement <4 x i32 > %ext , i64 2
376+ %ext.3 = extractelement <4 x i32 > %ext , i64 3
377+
378+ %add1 = add i32 %ext.0 , %ext.1
379+ %add2 = add i32 %add1 , %ext.2
380+ %add3 = add i32 %add2 , %ext.3
381+ ret i32 %add3
382+ }
383+
384+ define noundef i32 @zext_v4i8_not_all_lanes_used (<4 x i8 > %src ) {
385+ ; CHECK-LABEL: define noundef i32 @zext_v4i8_not_all_lanes_used(
386+ ; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
387+ ; CHECK-NEXT: [[ENTRY:.*:]]
388+ ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i8> [[SRC]]
389+ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
390+ ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
391+ ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
392+ ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
393+ ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 255
394+ ; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
395+ ; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
396+ ; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
397+ ; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
398+ ; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP6]], [[TMP5]]
399+ ; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
400+ ; CHECK-NEXT: ret i32 [[ADD3]]
401+ ;
402+ entry:
403+ %ext = zext nneg <4 x i8 > %src to <4 x i32 >
404+ %ext.0 = extractelement <4 x i32 > %ext , i64 0
405+ %ext.1 = extractelement <4 x i32 > %ext , i64 1
406+ %ext.3 = extractelement <4 x i32 > %ext , i64 3
407+
408+ %add1 = add i32 %ext.0 , %ext.1
409+ %add2 = add i32 %add1 , %ext.3
410+ ret i32 %add2
411+ }
412+
413+ define i32 @zext_v4i8_all_lanes_used_no_ub (<4 x i8 > %src ) {
414+ ; CHECK-LABEL: define i32 @zext_v4i8_all_lanes_used_no_ub(
415+ ; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
416+ ; CHECK-NEXT: [[ENTRY:.*:]]
417+ ; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
418+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
419+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
420+ ; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
421+ ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
422+ ; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
423+ ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
424+ ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
425+ ; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
426+ ; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
427+ ; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
428+ ; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
429+ ; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
430+ ; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
431+ ; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
432+ ; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
433+ ; CHECK-NEXT: ret i32 [[ADD3]]
434+ ;
435+ entry:
436+ %ext = zext nneg <4 x i8 > %src to <4 x i32 >
437+ %ext.0 = extractelement <4 x i32 > %ext , i64 0
438+ %ext.1 = extractelement <4 x i32 > %ext , i64 1
439+ %ext.2 = extractelement <4 x i32 > %ext , i64 2
440+ %ext.3 = extractelement <4 x i32 > %ext , i64 3
441+
442+ %add1 = add i32 %ext.0 , %ext.1
443+ %add2 = add i32 %add1 , %ext.2
444+ %add3 = add i32 %add2 , %ext.3
445+ ret i32 %add3
446+ }
447+
448+ define noundef i32 @zext_v4i8_extracts_different_blocks (<4 x i8 > %src , i1 %cond ) {
449+ ; CHECK-LABEL: define noundef i32 @zext_v4i8_extracts_different_blocks(
450+ ; CHECK-SAME: <4 x i8> [[SRC:%.*]], i1 [[COND:%.*]]) {
451+ ; CHECK-NEXT: [[ENTRY:.*:]]
452+ ; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
453+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
454+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
455+ ; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
456+ ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
457+ ; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
458+ ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
459+ ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
460+ ; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
461+ ; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
462+ ; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
463+ ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
464+ ; CHECK: [[THEN]]:
465+ ; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
466+ ; CHECK-NEXT: br label %[[EXIT:.*]]
467+ ; CHECK: [[ELSE]]:
468+ ; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
469+ ; CHECK-NEXT: br label %[[EXIT]]
470+ ; CHECK: [[EXIT]]:
471+ ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP4]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
472+ ; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
473+ ; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[PHI]]
474+ ; CHECK-NEXT: ret i32 [[ADD2]]
475+ ;
476+ entry:
477+ %ext = zext nneg <4 x i8 > %src to <4 x i32 >
478+ %ext.0 = extractelement <4 x i32 > %ext , i64 0
479+ %ext.1 = extractelement <4 x i32 > %ext , i64 1
480+ br i1 %cond , label %then , label %else
481+
482+ then:
483+ %ext.2 = extractelement <4 x i32 > %ext , i64 2
484+ br label %exit
485+
486+ else:
487+ %ext.3 = extractelement <4 x i32 > %ext , i64 3
488+ br label %exit
489+
490+ exit:
491+ %phi = phi i32 [ %ext.2 , %then ], [ %ext.3 , %else ]
492+ %add1 = add i32 %ext.0 , %ext.1
493+ %add2 = add i32 %add1 , %phi
494+ ret i32 %add2
495+ }
496+
497+
498+ declare void @may_throw () willreturn
499+
500+ define noundef i32 @zext_v4i8_throwing_call_between (<4 x i8 > %src ) {
501+ ; CHECK-LABEL: define noundef i32 @zext_v4i8_throwing_call_between(
502+ ; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
503+ ; CHECK-NEXT: [[ENTRY:.*:]]
504+ ; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
505+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
506+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
507+ ; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
508+ ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
509+ ; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
510+ ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
511+ ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
512+ ; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
513+ ; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
514+ ; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
515+ ; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
516+ ; CHECK-NEXT: call void @may_throw()
517+ ; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
518+ ; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
519+ ; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
520+ ; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
521+ ; CHECK-NEXT: ret i32 [[ADD3]]
522+ ;
523+ entry:
524+ %ext = zext nneg <4 x i8 > %src to <4 x i32 >
525+ %ext.0 = extractelement <4 x i32 > %ext , i64 0
526+ %ext.1 = extractelement <4 x i32 > %ext , i64 1
527+ %ext.2 = extractelement <4 x i32 > %ext , i64 2
528+ call void @may_throw ()
529+ %ext.3 = extractelement <4 x i32 > %ext , i64 3
530+ %add1 = add i32 %ext.0 , %ext.1
531+ %add2 = add i32 %add1 , %ext.2
532+ %add3 = add i32 %add2 , %ext.3
533+ ret i32 %add3
534+ }
0 commit comments