@@ -449,4 +449,107 @@ exit:
449449  ret  void 
450450}
451451
452+ ; Make sure we don't consider first order recurrence phis as profitable to scalarize. 
453+ ; Test case for https://github.com/llvm/llvm-project/issues/139060 and 
454+ ; https://github.com/llvm/llvm-project/issues/139065. 
455+ define  void  @test_first_order_recurrence_tried_to_scalarized (ptr  %dst , i1  %c , i32  %x ) {
456+ ; CHECK-LABEL: @test_first_order_recurrence_tried_to_scalarized( 
457+ ; CHECK-NEXT:  entry: 
458+ ; CHECK-NEXT:    [[N:%.*]] = select i1 [[C:%.*]], i32 8, i32 9 
459+ ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 
460+ ; CHECK:       vector.ph: 
461+ ; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 3 
462+ ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 
463+ ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 
464+ ; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1 
465+ ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 
466+ ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 
467+ ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]] 
468+ ; CHECK:       vector.body: 
469+ ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 
470+ ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] 
471+ ; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND]], [[PRED_STORE_CONTINUE6]] ] 
472+ ; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 
473+ ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 
474+ ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 
475+ ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 
476+ ; CHECK:       pred.store.if: 
477+ ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0 
478+ ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i32, ptr [[DST:%.*]], i32 [[TMP3]] 
479+ ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0 
480+ ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw i32 10, [[TMP5]] 
481+ ; CHECK-NEXT:    store i32 [[TMP6]], ptr [[TMP4]], align 4 
482+ ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]] 
483+ ; CHECK:       pred.store.continue: 
484+ ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 
485+ ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] 
486+ ; CHECK:       pred.store.if1: 
487+ ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[INDEX]], 1 
488+ ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP8]] 
489+ ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 
490+ ; CHECK-NEXT:    [[TMP11:%.*]] = sub nsw i32 10, [[TMP10]] 
491+ ; CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP9]], align 4 
492+ ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]] 
493+ ; CHECK:       pred.store.continue2: 
494+ ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 
495+ ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 
496+ ; CHECK:       pred.store.if3: 
497+ ; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[INDEX]], 2 
498+ ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP13]] 
499+ ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 
500+ ; CHECK-NEXT:    [[TMP16:%.*]] = sub nsw i32 10, [[TMP15]] 
501+ ; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4 
502+ ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]] 
503+ ; CHECK:       pred.store.continue4: 
504+ ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 
505+ ; CHECK-NEXT:    br i1 [[TMP17]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] 
506+ ; CHECK:       pred.store.if5: 
507+ ; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[INDEX]], 3 
508+ ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP18]] 
509+ ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 
510+ ; CHECK-NEXT:    [[TMP21:%.*]] = sub nsw i32 10, [[TMP20]] 
511+ ; CHECK-NEXT:    store i32 [[TMP21]], ptr [[TMP19]], align 4 
512+ ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]] 
513+ ; CHECK:       pred.store.continue6: 
514+ ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 
515+ ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 
516+ ; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 
517+ ; CHECK-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 
518+ ; CHECK:       middle.block: 
519+ ; CHECK-NEXT:    br label [[EXIT:%.*]] 
520+ ; CHECK:       scalar.ph: 
521+ ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] 
522+ ; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 4, [[ENTRY]] ] 
523+ ; CHECK-NEXT:    br label [[LOOP:%.*]] 
524+ ; CHECK:       loop: 
525+ ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 
526+ ; CHECK-NEXT:    [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV]], [[LOOP]] ] 
527+ ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 
528+ ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 10, [[FOR]] 
529+ ; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[IV]] 
530+ ; CHECK-NEXT:    store i32 [[SUB]], ptr [[GEP_DST]], align 4 
531+ ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] 
532+ ; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] 
533+ ; CHECK:       exit: 
534+ ; CHECK-NEXT:    ret void 
535+ ; 
536+ entry:
537+   %N  = select  i1  %c , i32  8 , i32  9 
538+   br  label  %loop 
539+ 
540+ loop:
541+   %iv  = phi  i32  [ 0 , %entry  ], [ %iv.next , %loop  ]
542+   %for  = phi  i32  [ 4 , %entry  ], [ %iv , %loop  ]
543+   %iv.next  = add  nuw  nsw  i32  %iv , 1 
544+   %sub  = sub  nsw  i32  10 , %for 
545+   %gep.dst  = getelementptr  inbounds  nuw  i32 , ptr  %dst , i32  %iv 
546+   store  i32  %sub , ptr  %gep.dst , align  4 
547+   %ec  = icmp  eq  i32  %iv.next , %N 
548+   br  i1  %ec , label  %exit , label  %loop 
549+ 
550+ exit:
551+   ret  void 
552+ }
553+ 
554+ 
452555attributes  #0  = { "target-cpu" ="znver3"  }
0 commit comments