@@ -18,7 +18,7 @@ target triple = "aarch64-unknown-linux-gnu"
1818
1919; DEBUG-LABEL: LV: Checking a loop in 'trip_count_too_small'
2020; DEBUG: LV: Found a loop with a very small trip count. This loop is worth vectorizing only if no scalar iteration overheads are incurred.
21- ; DEBUG: LV: Not vectorizing: The trip count is below the minial threshold value. .
21+ ; DEBUG: LV: Not vectorizing: Runtime SCEV check is required with -Os/-Oz .
2222
2323; DEBUG-LABEL: LV: Checking a loop in 'too_many_runtime_checks'
2424; DEBUG: LV: Found trip count: 0
@@ -482,115 +482,117 @@ while.end:
482482define i32 @tc4 (ptr noundef readonly captures(none) %tmp ) vscale_range(1 ,16 ) {
483483; CHECK-LABEL: define i32 @tc4(
484484; CHECK-SAME: ptr noundef readonly captures(none) [[TMP:%.*]]) #[[ATTR1]] {
485- ; CHECK-NEXT: [[ENTRY:.*]]:
485+ ; CHECK-NEXT: [[ENTRY:.*:]]
486486; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 16
487487; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 32
488488; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 48
489489; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 64
490490; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 80
491491; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 96
492492; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP]], i64 112
493- ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
494- ; CHECK: [[FOR_COND_CLEANUP:.*]]:
495- ; CHECK-NEXT: [[ADD89_LCSSA:%.*]] = phi i32 [ [[ADD89:%.*]], %[[FOR_BODY]] ]
496- ; CHECK-NEXT: ret i32 [[ADD89_LCSSA]]
497- ; CHECK: [[FOR_BODY]]:
498- ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
499- ; CHECK-NEXT: [[SUM_0179:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD89]], %[[FOR_BODY]] ]
493+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 0, 0
500494; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[TMP]], i64 0, i64 [[TMP0]]
501- ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4
495+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 0
496+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
502497; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX2]], i64 0, i64 [[TMP0]]
503- ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
504- ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP10]], [[TMP8]]
505- ; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], [[TMP10]]
498+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP3]], i32 0
499+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
500+ ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
501+ ; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD1]]
506502; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[TMP0]]
507- ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP7]], align 4
503+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 0
504+ ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4
508505; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[TMP0]]
509- ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4
510- ; CHECK-NEXT: [[ADD17:%.*]] = add i32 [[TMP11]], [[TMP2]]
511- ; CHECK-NEXT: [[SUB24:%.*]] = sub i32 [[TMP2]], [[TMP11]]
512- ; CHECK-NEXT: [[ADD25:%.*]] = add i32 [[ADD17]], [[ADD]]
513- ; CHECK-NEXT: [[SUB26:%.*]] = sub i32 [[ADD]], [[ADD17]]
514- ; CHECK-NEXT: [[ADD27:%.*]] = add i32 [[SUB24]], [[SUB]]
515- ; CHECK-NEXT: [[SUB28:%.*]] = sub i32 [[SUB]], [[SUB24]]
506+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP9]], i32 0
507+ ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
508+ ; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD2]]
509+ ; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[WIDE_LOAD2]], [[WIDE_LOAD3]]
510+ ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP11]], [[TMP5]]
511+ ; CHECK-NEXT: [[TMP14:%.*]] = sub <4 x i32> [[TMP5]], [[TMP11]]
512+ ; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
513+ ; CHECK-NEXT: [[TMP16:%.*]] = sub <4 x i32> [[TMP6]], [[TMP12]]
516514; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX30]], i64 0, i64 [[TMP0]]
517- ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP17]], align 4
515+ ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP17]], i32 0
516+ ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP18]], align 4
518517; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX33]], i64 0, i64 [[TMP0]]
519- ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP19]], align 4
520- ; CHECK-NEXT: [[ADD36:%.*]] = add i32 [[TMP5]], [[TMP4]]
521- ; CHECK-NEXT: [[SUB44:%.*]] = sub i32 [[TMP4]], [[TMP5]]
518+ ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP19]], i32 0
519+ ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP20]], align 4
520+ ; CHECK-NEXT: [[TMP21:%.*]] = add <4 x i32> [[WIDE_LOAD5]], [[WIDE_LOAD4]]
521+ ; CHECK-NEXT: [[TMP22:%.*]] = sub <4 x i32> [[WIDE_LOAD4]], [[WIDE_LOAD5]]
522522; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX46]], i64 0, i64 [[TMP0]]
523- ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP23]], align 4
523+ ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP23]], i32 0
524+ ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP24]], align 4
524525; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[ARRAYIDX49]], i64 0, i64 [[TMP0]]
525- ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP25]], align 4
526- ; CHECK-NEXT: [[ADD52:%.*]] = add i32 [[TMP12]], [[TMP6]]
527- ; CHECK-NEXT: [[SUB60:%.*]] = sub i32 [[TMP6]], [[TMP12]]
528- ; CHECK-NEXT: [[ADD61:%.*]] = add i32 [[ADD52]], [[ADD36]]
529- ; CHECK-NEXT: [[SUB62:%.*]] = sub i32 [[ADD36]], [[ADD52]]
530- ; CHECK-NEXT: [[ADD63:%.*]] = add i32 [[SUB60]], [[SUB44]]
531- ; CHECK-NEXT: [[SUB64:%.*]] = sub i32 [[SUB44]], [[SUB60]]
532- ; CHECK-NEXT: [[ADD65:%.*]] = add i32 [[ADD61]], [[ADD25]]
533- ; CHECK-NEXT: [[SHR_I173:%.*]] = lshr i32 [[ADD65]], 15
534- ; CHECK-NEXT: [[AND_I174:%.*]] = and i32 [[SHR_I173]], 65537
535- ; CHECK-NEXT: [[MUL_I175:%.*]] = mul nuw i32 [[AND_I174]], 65535
536- ; CHECK-NEXT: [[ADD_I176:%.*]] = add i32 [[MUL_I175]], [[ADD65]]
537- ; CHECK-NEXT: [[XOR_I177:%.*]] = xor i32 [[ADD_I176]], [[MUL_I175]]
538- ; CHECK-NEXT: [[SUB66:%.*]] = sub i32 [[ADD25]], [[ADD61]]
539- ; CHECK-NEXT: [[SHR_I168:%.*]] = lshr i32 [[SUB66]], 15
540- ; CHECK-NEXT: [[AND_I169:%.*]] = and i32 [[SHR_I168]], 65537
541- ; CHECK-NEXT: [[MUL_I170:%.*]] = mul nuw i32 [[AND_I169]], 65535
542- ; CHECK-NEXT: [[ADD_I171:%.*]] = add i32 [[MUL_I170]], [[SUB66]]
543- ; CHECK-NEXT: [[XOR_I172:%.*]] = xor i32 [[ADD_I171]], [[MUL_I170]]
544- ; CHECK-NEXT: [[ADD69:%.*]] = add i32 [[ADD63]], [[ADD27]]
545- ; CHECK-NEXT: [[SHR_I163:%.*]] = lshr i32 [[ADD69]], 15
546- ; CHECK-NEXT: [[AND_I164:%.*]] = and i32 [[SHR_I163]], 65537
547- ; CHECK-NEXT: [[MUL_I165:%.*]] = mul nuw i32 [[AND_I164]], 65535
548- ; CHECK-NEXT: [[ADD_I166:%.*]] = add i32 [[MUL_I165]], [[ADD69]]
549- ; CHECK-NEXT: [[XOR_I167:%.*]] = xor i32 [[ADD_I166]], [[MUL_I165]]
550- ; CHECK-NEXT: [[SUB71:%.*]] = sub i32 [[ADD27]], [[ADD63]]
551- ; CHECK-NEXT: [[SHR_I158:%.*]] = lshr i32 [[SUB71]], 15
552- ; CHECK-NEXT: [[AND_I159:%.*]] = and i32 [[SHR_I158]], 65537
553- ; CHECK-NEXT: [[MUL_I160:%.*]] = mul nuw i32 [[AND_I159]], 65535
554- ; CHECK-NEXT: [[ADD_I161:%.*]] = add i32 [[MUL_I160]], [[SUB71]]
555- ; CHECK-NEXT: [[XOR_I162:%.*]] = xor i32 [[ADD_I161]], [[MUL_I160]]
556- ; CHECK-NEXT: [[ADD75:%.*]] = add i32 [[SUB62]], [[SUB26]]
557- ; CHECK-NEXT: [[SHR_I153:%.*]] = lshr i32 [[ADD75]], 15
558- ; CHECK-NEXT: [[AND_I154:%.*]] = and i32 [[SHR_I153]], 65537
559- ; CHECK-NEXT: [[MUL_I155:%.*]] = mul nuw i32 [[AND_I154]], 65535
560- ; CHECK-NEXT: [[ADD_I156:%.*]] = add i32 [[MUL_I155]], [[ADD75]]
561- ; CHECK-NEXT: [[XOR_I157:%.*]] = xor i32 [[ADD_I156]], [[MUL_I155]]
562- ; CHECK-NEXT: [[SUB77:%.*]] = sub i32 [[SUB26]], [[SUB62]]
563- ; CHECK-NEXT: [[SHR_I148:%.*]] = lshr i32 [[SUB77]], 15
564- ; CHECK-NEXT: [[AND_I149:%.*]] = and i32 [[SHR_I148]], 65537
565- ; CHECK-NEXT: [[MUL_I150:%.*]] = mul nuw i32 [[AND_I149]], 65535
566- ; CHECK-NEXT: [[ADD_I151:%.*]] = add i32 [[MUL_I150]], [[SUB77]]
567- ; CHECK-NEXT: [[XOR_I152:%.*]] = xor i32 [[ADD_I151]], [[MUL_I150]]
568- ; CHECK-NEXT: [[ADD81:%.*]] = add i32 [[SUB64]], [[SUB28]]
569- ; CHECK-NEXT: [[SHR_I143:%.*]] = lshr i32 [[ADD81]], 15
570- ; CHECK-NEXT: [[AND_I144:%.*]] = and i32 [[SHR_I143]], 65537
571- ; CHECK-NEXT: [[MUL_I145:%.*]] = mul nuw i32 [[AND_I144]], 65535
572- ; CHECK-NEXT: [[ADD_I146:%.*]] = add i32 [[MUL_I145]], [[ADD81]]
573- ; CHECK-NEXT: [[XOR_I147:%.*]] = xor i32 [[ADD_I146]], [[MUL_I145]]
574- ; CHECK-NEXT: [[SUB83:%.*]] = sub i32 [[SUB28]], [[SUB64]]
575- ; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[SUB83]], 15
576- ; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
577- ; CHECK-NEXT: [[MUL_I:%.*]] = mul nuw i32 [[AND_I]], 65535
578- ; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[SUB83]]
579- ; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[MUL_I]]
580- ; CHECK-NEXT: [[ADD73:%.*]] = add i32 [[XOR_I147]], [[XOR_I]]
581- ; CHECK-NEXT: [[ADD68:%.*]] = add i32 [[ADD73]], [[XOR_I152]]
582- ; CHECK-NEXT: [[ADD74:%.*]] = add i32 [[ADD68]], [[XOR_I157]]
583- ; CHECK-NEXT: [[ADD79:%.*]] = add i32 [[ADD74]], [[XOR_I172]]
584- ; CHECK-NEXT: [[ADD80:%.*]] = add i32 [[ADD79]], [[XOR_I177]]
585- ; CHECK-NEXT: [[ADD85:%.*]] = add i32 [[ADD80]], [[XOR_I162]]
586- ; CHECK-NEXT: [[ADD86:%.*]] = add i32 [[ADD85]], [[XOR_I167]]
587- ; CHECK-NEXT: [[CONV87:%.*]] = and i32 [[ADD86]], 65535
588- ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD86]], 16
589- ; CHECK-NEXT: [[ADD88:%.*]] = add i32 [[SHR]], [[SUM_0179]]
590- ; CHECK-NEXT: [[ADD89]] = add i32 [[ADD88]], [[CONV87]]
591- ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP0]], 1
592- ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
593- ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
526+ ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP25]], i32 0
527+ ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4
528+ ; CHECK-NEXT: [[TMP27:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD6]]
529+ ; CHECK-NEXT: [[TMP28:%.*]] = sub <4 x i32> [[WIDE_LOAD6]], [[WIDE_LOAD7]]
530+ ; CHECK-NEXT: [[TMP29:%.*]] = add <4 x i32> [[TMP27]], [[TMP21]]
531+ ; CHECK-NEXT: [[TMP30:%.*]] = sub <4 x i32> [[TMP21]], [[TMP27]]
532+ ; CHECK-NEXT: [[TMP31:%.*]] = add <4 x i32> [[TMP28]], [[TMP22]]
533+ ; CHECK-NEXT: [[TMP32:%.*]] = sub <4 x i32> [[TMP22]], [[TMP28]]
534+ ; CHECK-NEXT: [[TMP33:%.*]] = add <4 x i32> [[TMP29]], [[TMP13]]
535+ ; CHECK-NEXT: [[TMP34:%.*]] = lshr <4 x i32> [[TMP33]], splat (i32 15)
536+ ; CHECK-NEXT: [[TMP35:%.*]] = and <4 x i32> [[TMP34]], splat (i32 65537)
537+ ; CHECK-NEXT: [[TMP36:%.*]] = mul nuw <4 x i32> [[TMP35]], splat (i32 65535)
538+ ; CHECK-NEXT: [[TMP37:%.*]] = add <4 x i32> [[TMP36]], [[TMP33]]
539+ ; CHECK-NEXT: [[TMP38:%.*]] = xor <4 x i32> [[TMP37]], [[TMP36]]
540+ ; CHECK-NEXT: [[TMP39:%.*]] = sub <4 x i32> [[TMP13]], [[TMP29]]
541+ ; CHECK-NEXT: [[TMP40:%.*]] = lshr <4 x i32> [[TMP39]], splat (i32 15)
542+ ; CHECK-NEXT: [[TMP41:%.*]] = and <4 x i32> [[TMP40]], splat (i32 65537)
543+ ; CHECK-NEXT: [[TMP42:%.*]] = mul nuw <4 x i32> [[TMP41]], splat (i32 65535)
544+ ; CHECK-NEXT: [[TMP43:%.*]] = add <4 x i32> [[TMP42]], [[TMP39]]
545+ ; CHECK-NEXT: [[TMP44:%.*]] = xor <4 x i32> [[TMP43]], [[TMP42]]
546+ ; CHECK-NEXT: [[TMP45:%.*]] = add <4 x i32> [[TMP31]], [[TMP15]]
547+ ; CHECK-NEXT: [[TMP46:%.*]] = lshr <4 x i32> [[TMP45]], splat (i32 15)
548+ ; CHECK-NEXT: [[TMP47:%.*]] = and <4 x i32> [[TMP46]], splat (i32 65537)
549+ ; CHECK-NEXT: [[TMP48:%.*]] = mul nuw <4 x i32> [[TMP47]], splat (i32 65535)
550+ ; CHECK-NEXT: [[TMP49:%.*]] = add <4 x i32> [[TMP48]], [[TMP45]]
551+ ; CHECK-NEXT: [[TMP50:%.*]] = xor <4 x i32> [[TMP49]], [[TMP48]]
552+ ; CHECK-NEXT: [[TMP51:%.*]] = sub <4 x i32> [[TMP15]], [[TMP31]]
553+ ; CHECK-NEXT: [[TMP52:%.*]] = lshr <4 x i32> [[TMP51]], splat (i32 15)
554+ ; CHECK-NEXT: [[TMP53:%.*]] = and <4 x i32> [[TMP52]], splat (i32 65537)
555+ ; CHECK-NEXT: [[TMP54:%.*]] = mul nuw <4 x i32> [[TMP53]], splat (i32 65535)
556+ ; CHECK-NEXT: [[TMP55:%.*]] = add <4 x i32> [[TMP54]], [[TMP51]]
557+ ; CHECK-NEXT: [[TMP56:%.*]] = xor <4 x i32> [[TMP55]], [[TMP54]]
558+ ; CHECK-NEXT: [[TMP57:%.*]] = add <4 x i32> [[TMP30]], [[TMP14]]
559+ ; CHECK-NEXT: [[TMP58:%.*]] = lshr <4 x i32> [[TMP57]], splat (i32 15)
560+ ; CHECK-NEXT: [[TMP59:%.*]] = and <4 x i32> [[TMP58]], splat (i32 65537)
561+ ; CHECK-NEXT: [[TMP60:%.*]] = mul nuw <4 x i32> [[TMP59]], splat (i32 65535)
562+ ; CHECK-NEXT: [[TMP61:%.*]] = add <4 x i32> [[TMP60]], [[TMP57]]
563+ ; CHECK-NEXT: [[TMP62:%.*]] = xor <4 x i32> [[TMP61]], [[TMP60]]
564+ ; CHECK-NEXT: [[TMP63:%.*]] = sub <4 x i32> [[TMP14]], [[TMP30]]
565+ ; CHECK-NEXT: [[TMP64:%.*]] = lshr <4 x i32> [[TMP63]], splat (i32 15)
566+ ; CHECK-NEXT: [[TMP65:%.*]] = and <4 x i32> [[TMP64]], splat (i32 65537)
567+ ; CHECK-NEXT: [[TMP66:%.*]] = mul nuw <4 x i32> [[TMP65]], splat (i32 65535)
568+ ; CHECK-NEXT: [[TMP67:%.*]] = add <4 x i32> [[TMP66]], [[TMP63]]
569+ ; CHECK-NEXT: [[TMP68:%.*]] = xor <4 x i32> [[TMP67]], [[TMP66]]
570+ ; CHECK-NEXT: [[TMP69:%.*]] = add <4 x i32> [[TMP32]], [[TMP16]]
571+ ; CHECK-NEXT: [[TMP70:%.*]] = lshr <4 x i32> [[TMP69]], splat (i32 15)
572+ ; CHECK-NEXT: [[TMP71:%.*]] = and <4 x i32> [[TMP70]], splat (i32 65537)
573+ ; CHECK-NEXT: [[TMP72:%.*]] = mul nuw <4 x i32> [[TMP71]], splat (i32 65535)
574+ ; CHECK-NEXT: [[TMP73:%.*]] = add <4 x i32> [[TMP72]], [[TMP69]]
575+ ; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i32> [[TMP73]], [[TMP72]]
576+ ; CHECK-NEXT: [[TMP75:%.*]] = sub <4 x i32> [[TMP16]], [[TMP32]]
577+ ; CHECK-NEXT: [[TMP76:%.*]] = lshr <4 x i32> [[TMP75]], splat (i32 15)
578+ ; CHECK-NEXT: [[TMP77:%.*]] = and <4 x i32> [[TMP76]], splat (i32 65537)
579+ ; CHECK-NEXT: [[TMP78:%.*]] = mul nuw <4 x i32> [[TMP77]], splat (i32 65535)
580+ ; CHECK-NEXT: [[TMP79:%.*]] = add <4 x i32> [[TMP78]], [[TMP75]]
581+ ; CHECK-NEXT: [[TMP80:%.*]] = xor <4 x i32> [[TMP79]], [[TMP78]]
582+ ; CHECK-NEXT: [[TMP81:%.*]] = add <4 x i32> [[TMP74]], [[TMP80]]
583+ ; CHECK-NEXT: [[TMP82:%.*]] = add <4 x i32> [[TMP81]], [[TMP68]]
584+ ; CHECK-NEXT: [[TMP83:%.*]] = add <4 x i32> [[TMP82]], [[TMP62]]
585+ ; CHECK-NEXT: [[TMP84:%.*]] = add <4 x i32> [[TMP83]], [[TMP44]]
586+ ; CHECK-NEXT: [[TMP85:%.*]] = add <4 x i32> [[TMP84]], [[TMP38]]
587+ ; CHECK-NEXT: [[TMP86:%.*]] = add <4 x i32> [[TMP85]], [[TMP56]]
588+ ; CHECK-NEXT: [[TMP87:%.*]] = add <4 x i32> [[TMP86]], [[TMP50]]
589+ ; CHECK-NEXT: [[TMP88:%.*]] = and <4 x i32> [[TMP87]], splat (i32 65535)
590+ ; CHECK-NEXT: [[TMP89:%.*]] = lshr <4 x i32> [[TMP87]], splat (i32 16)
591+ ; CHECK-NEXT: [[TMP90:%.*]] = add <4 x i32> [[TMP89]], zeroinitializer
592+ ; CHECK-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP90]], [[TMP88]]
593+ ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw i64 0, 4
594+ ; CHECK-NEXT: [[TMP92:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP91]])
595+ ; CHECK-NEXT: ret i32 [[TMP92]]
594596;
595597entry:
596598 %arrayidx2 = getelementptr inbounds nuw i8 , ptr %tmp , i64 16
0 commit comments