@@ -380,7 +380,7 @@ for.end:
380380 ret void
381381}
382382
383- define void @loop_with_freeze_and_conditional_srem (ptr %dst , ptr %keyinfo , ptr %invariant.ptr , i32 %divisor ) # 1 {
383+ define void @loop_with_freeze_and_conditional_srem (ptr %dst , ptr %keyinfo , ptr %invariant.ptr , i32 %divisor ) {
384384; COMMON-LABEL: define void @loop_with_freeze_and_conditional_srem(
385385; COMMON-SAME: ptr [[DST:%.*]], ptr [[KEYINFO:%.*]], ptr [[INVARIANT_PTR:%.*]], i32 [[DIVISOR:%.*]]) {
386386; COMMON-NEXT: [[ENTRY:.*]]:
@@ -433,7 +433,165 @@ exit: ; preds = %loop.latch
433433 ret void
434434}
435435
436+ define void @interleave_group (ptr %dst ) #1 {
437+ ; COST1-LABEL: define void @interleave_group(
438+ ; COST1-SAME: ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] {
439+ ; COST1-NEXT: [[ITER_CHECK:.*:]]
440+ ; COST1-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
441+ ; COST1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
442+ ; COST1-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
443+ ; COST1: [[VECTOR_PH]]:
444+ ; COST1-NEXT: br label %[[VECTOR_BODY:.*]]
445+ ; COST1: [[VECTOR_BODY]]:
446+ ; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
447+ ; COST1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 16
448+ ; COST1-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], 3
449+ ; COST1-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 3
450+ ; COST1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
451+ ; COST1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]]
452+ ; COST1-NEXT: store <48 x i8> zeroinitializer, ptr [[TMP3]], align 1
453+ ; COST1-NEXT: store <48 x i8> zeroinitializer, ptr [[TMP4]], align 1
454+ ; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
455+ ; COST1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
456+ ; COST1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
457+ ; COST1: [[MIDDLE_BLOCK]]:
458+ ; COST1-NEXT: br i1 false, [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
459+ ; COST1: [[VEC_EPILOG_ITER_CHECK]]:
460+ ; COST1-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4]]
461+ ; COST1: [[VEC_EPILOG_PH]]:
462+ ; COST1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
463+ ; COST1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
464+ ; COST1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
465+ ; COST1-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
466+ ; COST1-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
467+ ; COST1: [[VEC_EPILOG_VECTOR_BODY]]:
468+ ; COST1-NEXT: [[INDEX1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
469+ ; COST1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
470+ ; COST1-NEXT: [[TMP6:%.*]] = mul <4 x i64> [[VEC_IND]], splat (i64 3)
471+ ; COST1-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
472+ ; COST1-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
473+ ; COST1-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
474+ ; COST1-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
475+ ; COST1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
476+ ; COST1-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
477+ ; COST1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]]
478+ ; COST1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]]
479+ ; COST1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 2
480+ ; COST1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP12]], i64 2
481+ ; COST1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 2
482+ ; COST1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP14]], i64 2
483+ ; COST1-NEXT: store i8 0, ptr [[TMP15]], align 1
484+ ; COST1-NEXT: store i8 0, ptr [[TMP16]], align 1
485+ ; COST1-NEXT: store i8 0, ptr [[TMP17]], align 1
486+ ; COST1-NEXT: store i8 0, ptr [[TMP18]], align 1
487+ ; COST1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP11]], i64 1
488+ ; COST1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP12]], i64 1
489+ ; COST1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP13]], i64 1
490+ ; COST1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP14]], i64 1
491+ ; COST1-NEXT: store i8 0, ptr [[TMP19]], align 1
492+ ; COST1-NEXT: store i8 0, ptr [[TMP20]], align 1
493+ ; COST1-NEXT: store i8 0, ptr [[TMP21]], align 1
494+ ; COST1-NEXT: store i8 0, ptr [[TMP22]], align 1
495+ ; COST1-NEXT: store i8 0, ptr [[TMP11]], align 1
496+ ; COST1-NEXT: store i8 0, ptr [[TMP12]], align 1
497+ ; COST1-NEXT: store i8 0, ptr [[TMP13]], align 1
498+ ; COST1-NEXT: store i8 0, ptr [[TMP14]], align 1
499+ ; COST1-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
500+ ; COST1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
501+ ; COST1-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100
502+ ; COST1-NEXT: br i1 [[TMP23]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
503+ ; COST1: [[VEC_EPILOG_MIDDLE_BLOCK]]:
504+ ; COST1-NEXT: br i1 false, [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
505+ ; COST1: [[VEC_EPILOG_SCALAR_PH]]:
506+ ;
507+ ; COST10-LABEL: define void @interleave_group(
508+ ; COST10-SAME: ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] {
509+ ; COST10-NEXT: [[ITER_CHECK:.*:]]
510+ ; COST10-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
511+ ; COST10: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
512+ ; COST10-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
513+ ; COST10: [[VECTOR_PH]]:
514+ ; COST10-NEXT: br label %[[VECTOR_BODY:.*]]
515+ ; COST10: [[VECTOR_BODY]]:
516+ ; COST10-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
517+ ; COST10-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
518+ ; COST10-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
519+ ; COST10-NEXT: store <48 x i8> zeroinitializer, ptr [[TMP1]], align 1
520+ ; COST10-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
521+ ; COST10-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
522+ ; COST10-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
523+ ; COST10: [[MIDDLE_BLOCK]]:
524+ ; COST10-NEXT: br i1 false, [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
525+ ; COST10: [[VEC_EPILOG_ITER_CHECK]]:
526+ ; COST10-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4]]
527+ ; COST10: [[VEC_EPILOG_PH]]:
528+ ; COST10-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
529+ ; COST10-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
530+ ; COST10-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
531+ ; COST10-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
532+ ; COST10-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
533+ ; COST10: [[VEC_EPILOG_VECTOR_BODY]]:
534+ ; COST10-NEXT: [[INDEX1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
535+ ; COST10-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
536+ ; COST10-NEXT: [[TMP3:%.*]] = mul <4 x i64> [[VEC_IND]], splat (i64 3)
537+ ; COST10-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
538+ ; COST10-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
539+ ; COST10-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
540+ ; COST10-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
541+ ; COST10-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
542+ ; COST10-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
543+ ; COST10-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
544+ ; COST10-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
545+ ; COST10-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 2
546+ ; COST10-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP9]], i64 2
547+ ; COST10-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 2
548+ ; COST10-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 2
549+ ; COST10-NEXT: store i8 0, ptr [[TMP12]], align 1
550+ ; COST10-NEXT: store i8 0, ptr [[TMP13]], align 1
551+ ; COST10-NEXT: store i8 0, ptr [[TMP14]], align 1
552+ ; COST10-NEXT: store i8 0, ptr [[TMP15]], align 1
553+ ; COST10-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP8]], i64 1
554+ ; COST10-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1
555+ ; COST10-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1
556+ ; COST10-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP11]], i64 1
557+ ; COST10-NEXT: store i8 0, ptr [[TMP16]], align 1
558+ ; COST10-NEXT: store i8 0, ptr [[TMP17]], align 1
559+ ; COST10-NEXT: store i8 0, ptr [[TMP18]], align 1
560+ ; COST10-NEXT: store i8 0, ptr [[TMP19]], align 1
561+ ; COST10-NEXT: store i8 0, ptr [[TMP8]], align 1
562+ ; COST10-NEXT: store i8 0, ptr [[TMP9]], align 1
563+ ; COST10-NEXT: store i8 0, ptr [[TMP10]], align 1
564+ ; COST10-NEXT: store i8 0, ptr [[TMP11]], align 1
565+ ; COST10-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
566+ ; COST10-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
567+ ; COST10-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100
568+ ; COST10-NEXT: br i1 [[TMP20]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
569+ ; COST10: [[VEC_EPILOG_MIDDLE_BLOCK]]:
570+ ; COST10-NEXT: br i1 false, [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
571+ ; COST10: [[VEC_EPILOG_SCALAR_PH]]:
572+ ;
573+ entry:
574+ br label %loop
575+
576+ loop:
577+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
578+ %iv.3 = mul i64 %iv , 3
579+ %gep.0 = getelementptr i8 , ptr %dst , i64 %iv.3
580+ %gep.2 = getelementptr i8 , ptr %gep.0 , i64 2
581+ store i8 0 , ptr %gep.2 , align 1
582+ %gep.1 = getelementptr i8 , ptr %gep.0 , i64 1
583+ store i8 0 , ptr %gep.1 , align 1
584+ store i8 0 , ptr %gep.0 , align 1
585+ %iv.next = add i64 %iv , 1
586+ %ec = icmp eq i64 %iv , 100
587+ br i1 %ec , label %exit , label %loop
588+
589+ exit:
590+ ret void
591+ }
592+
436593attributes #0 = { "target-features" ="+neon,+sve" vscale_range(1 ,16 ) }
594+ attributes #1 = { "target-cpu" ="neoverse-512tvb" }
437595
438596declare void @llvm.assume (i1 noundef)
439597declare i64 @llvm.umin.i64 (i64 , i64 )
0 commit comments