@@ -338,6 +338,79 @@ loop.exit:
338338  ret  void 
339339}
340340
341+ ; Same as @drop_vector_nuw_nsw, except built with avx1; in this case, 
342+ ; we make scalar clones of the 'sub' operation. These clones also need 
343+ ; cleared flags. 
344+ define  void  @drop_nonvector_nuw_nsw_avx1 (ptr  noalias  nocapture  readonly  %input , ptr  %output , ptr  noalias  %ptrs ) local_unnamed_addr  #1  {
345+ ; CHECK-LABEL: define void @drop_nonvector_nuw_nsw_avx1( 
346+ ; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]], ptr noalias [[PTRS:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { 
347+ ; CHECK-NEXT:  [[ENTRY:.*:]] 
348+ ; CHECK-NEXT:    br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] 
349+ ; CHECK:       [[VECTOR_PH]]: 
350+ ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]] 
351+ ; CHECK:       [[VECTOR_BODY]]: 
352+ ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 
353+ ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 
354+ ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0 
355+ ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1 
356+ ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2 
357+ ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3 
358+ ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 
359+ ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]] 
360+ ; CHECK-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP0]], 1 
361+ ; CHECK-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP1]], 1 
362+ ; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP2]], 1 
363+ ; CHECK-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP3]], 1 
364+ ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]] 
365+ ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]] 
366+ ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]] 
367+ ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP9]] 
368+ ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP10]], i32 0 
369+ ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1 
370+ ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2 
371+ ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3 
372+ ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 
373+ ; CHECK-NEXT:    store <4 x ptr> [[TMP17]], ptr [[TMP18]], align 8 
374+ ; CHECK-NEXT:    [[TMP19:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 
375+ ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 
376+ ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP20]], i32 4, <4 x i1> [[TMP19]], <4 x float> poison), !invariant.load [[META0]] 
377+ ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] 
378+ ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[TMP0]] 
379+ ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 0 
380+ ; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP22]], align 4 
381+ ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
382+ ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 
383+ ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 
384+ ; CHECK:       [[MIDDLE_BLOCK]]: 
385+ ; 
386+ entry:
387+   br  label  %loop.header 
388+ 
389+ loop.header:
390+   %iv  = phi  i64  [ 0 , %entry  ], [ %iv.inc , %if.end  ]
391+   %i23  = icmp  eq  i64  %iv , 0 
392+   %gep  = getelementptr  inbounds  ptr , ptr  %ptrs , i64  %iv 
393+   %i27  = sub  nuw  nsw  i64  %iv , 1 
394+   %i29  = getelementptr  inbounds  float , ptr  %input , i64  %i27 
395+   store  ptr  %i29 , ptr  %gep 
396+   br  i1  %i23 , label  %if.end , label  %if.then 
397+ 
398+ if.then:
399+   %i30  = load  float , ptr  %i29 , align  4 , !invariant.load  !0 
400+   br  label  %if.end 
401+ 
402+ if.end:
403+   %i34  = phi  float  [ 0 .000000e+00 , %loop.header  ], [ %i30 , %if.then  ]
404+   %i35  = getelementptr  inbounds  float , ptr  %output , i64  %iv 
405+   store  float  %i34 , ptr  %i35 , align  4 
406+   %iv.inc  = add  nuw  nsw  i64  %iv , 1 
407+   %exitcond  = icmp  eq  i64  %iv.inc , 4 
408+   br  i1  %exitcond , label  %loop.exit , label  %loop.header 
409+ 
410+ loop.exit:
411+   ret  void 
412+ }
413+ 
341414; Preserve poison-generating flags from 'sub', which is not contributing to any address computation 
342415; of any masked load/store/gather/scatter. 
343416define  void  @preserve_nuw_nsw_no_addr (ptr  %output ) local_unnamed_addr  #0  {
@@ -358,7 +431,7 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
358431; CHECK-NEXT:    store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4 
359432; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
360433; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 
361- ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15 :![0-9]+]] 
434+ ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17 :![0-9]+]] 
362435; CHECK:       [[MIDDLE_BLOCK]]: 
363436; 
364437entry:
@@ -411,7 +484,7 @@ define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ptr %outpu
411484; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4 
412485; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
413486; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 
414- ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17 :![0-9]+]] 
487+ ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19 :![0-9]+]] 
415488; CHECK:       [[MIDDLE_BLOCK]]: 
416489; 
417490entry:
@@ -465,7 +538,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
465538; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
466539; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 
467540; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 
468- ; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19 :![0-9]+]] 
541+ ; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21 :![0-9]+]] 
469542; CHECK:       [[MIDDLE_BLOCK]]: 
470543; 
471544entry:
@@ -520,7 +593,7 @@ define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input
520593; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4 
521594; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
522595; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 
523- ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21 :![0-9]+]] 
596+ ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23 :![0-9]+]] 
524597; CHECK:       [[MIDDLE_BLOCK]]: 
525598; 
526599entry:
@@ -572,7 +645,7 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 {
572645; CHECK-NEXT:    store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4 
573646; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
574647; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 
575- ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23 :![0-9]+]] 
648+ ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25 :![0-9]+]] 
576649; CHECK:       [[MIDDLE_BLOCK]]: 
577650; 
578651entry:
@@ -720,7 +793,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
720793; CHECK-NEXT:    store <4 x i8> [[PREDPHI]], ptr [[TMP16]], align 4 
721794; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
722795; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 
723- ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25 :![0-9]+]] 
796+ ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27 :![0-9]+]] 
724797; CHECK:       [[MIDDLE_BLOCK]]: 
725798; 
726799
@@ -820,7 +893,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
820893; CHECK-NEXT:    store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4 
821894; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
822895; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 
823- ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27 :![0-9]+]] 
896+ ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29 :![0-9]+]] 
824897; CHECK:       [[MIDDLE_BLOCK]]: 
825898; 
826899
@@ -879,7 +952,7 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read
879952; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 
880953; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
881954; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 
882- ; CHECK-NEXT:    br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29 :![0-9]+]] 
955+ ; CHECK-NEXT:    br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31 :![0-9]+]] 
883956; CHECK:       [[MIDDLE_BLOCK]]: 
884957; 
885958
@@ -911,5 +984,6 @@ exit:
911984}
912985
913986attributes  #0  = { noinline  nounwind  uwtable  "target-features" ="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl"  }
987+ attributes  #1  = { "target-features" ="+avx"  }
914988
915989!0  = !{}
0 commit comments