@@ -329,24 +329,96 @@ for.end:
329329define void @multi_exit (ptr %dst , ptr %src.1 , ptr %src.2 , i64 %A , i64 %B ) #0 {
330330; CHECK-LABEL: @multi_exit(
331331; CHECK-NEXT: entry:
332+ ; CHECK-NEXT: [[UMAX9:%.*]] = call i64 @llvm.umax.i64(i64 [[B:%.*]], i64 1)
333+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX9]], -1
334+ ; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
335+ ; CHECK-NEXT: [[UMIN10:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[A:%.*]])
336+ ; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[UMIN10]], 1
337+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 24
338+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
339+ ; CHECK: vector.scevcheck:
340+ ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1)
341+ ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMAX]], -1
342+ ; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
343+ ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[A]])
344+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN]] to i32
345+ ; CHECK-NEXT: [[TMP6:%.*]] = add i32 1, [[TMP5]]
346+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 1
347+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
348+ ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
349+ ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[UMIN]] to i32
350+ ; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], 0
351+ ; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
352+ ; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
353+ ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP9]], [[TMP13]]
354+ ; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
355+ ; CHECK: vector.memcheck:
356+ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 1
357+ ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 8
358+ ; CHECK-NEXT: [[UMAX3:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1)
359+ ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[UMAX3]], -1
360+ ; CHECK-NEXT: [[TMP16:%.*]] = freeze i64 [[TMP15]]
361+ ; CHECK-NEXT: [[UMIN4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP16]], i64 [[A]])
362+ ; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[UMIN4]], 3
363+ ; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8
364+ ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC_3:%.*]], i64 [[TMP18]]
365+ ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
366+ ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_2]], [[SCEVGEP]]
367+ ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
368+ ; CHECK-NEXT: [[BOUND06:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP5]]
369+ ; CHECK-NEXT: [[BOUND17:%.*]] = icmp ult ptr [[SRC_3]], [[SCEVGEP]]
370+ ; CHECK-NEXT: [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]]
371+ ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT8]]
372+ ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
373+ ; CHECK: vector.ph:
374+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
375+ ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
376+ ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 4, i64 [[N_MOD_VF]]
377+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP20]]
378+ ; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META6:![0-9]+]]
379+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP21]], i64 0
380+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
381+ ; CHECK-NEXT: [[TMP22:%.*]] = trunc i64 [[N_VEC]] to i32
382+ ; CHECK-NEXT: [[TMP23:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
332383; CHECK-NEXT: br label [[LOOP:%.*]]
384+ ; CHECK: vector.body:
385+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
386+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
387+ ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[SRC_3]], i32 [[OFFSET_IDX]]
388+ ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i32 2
389+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP25]], align 8, !alias.scope [[META9:![0-9]+]]
390+ ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer
391+ ; CHECK-NEXT: [[TMP27:%.*]] = and <2 x i1> [[TMP23]], [[TMP26]]
392+ ; CHECK-NEXT: [[TMP28:%.*]] = zext <2 x i1> [[TMP27]] to <2 x i8>
393+ ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i8> [[TMP28]], i32 1
394+ ; CHECK-NEXT: store i8 [[TMP29]], ptr [[DST]], align 1, !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]]
395+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
396+ ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
397+ ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
398+ ; CHECK: middle.block:
399+ ; CHECK-NEXT: br label [[SCALAR_PH]]
400+ ; CHECK: scalar.ph:
401+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
402+ ; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
403+ ; CHECK-NEXT: br label [[LOOP1:%.*]]
333404; CHECK: loop:
334- ; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ 0 , [[ENTRY:%.* ]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ]
335- ; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ 0 , [[ENTRY ]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_LATCH]] ]
336- ; CHECK-NEXT: [[EC_1:%.*]] = icmp ult i64 [[IV_1_WIDE]], [[A:%.* ]]
405+ ; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL]] , [[SCALAR_PH ]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ]
406+ ; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL11]] , [[SCALAR_PH ]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_LATCH]] ]
407+ ; CHECK-NEXT: [[EC_1:%.*]] = icmp ult i64 [[IV_1_WIDE]], [[A]]
337408; CHECK-NEXT: br i1 [[EC_1]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
338409; CHECK: loop.latch:
339- ; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[SRC_1:%.*]], align 8
340- ; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[SRC_2:%.*]], align 8
410+ ; CHECK-NEXT: [[SRC_1:%.*]] = getelementptr inbounds i64, ptr [[SRC_3]], i32 [[IV_1]]
411+ ; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[SRC_1]], align 8
412+ ; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[SRC_2]], align 8
341413; CHECK-NEXT: [[CMP55_US:%.*]] = icmp eq i64 [[L_1]], 0
342414; CHECK-NEXT: [[CMP_I_US:%.*]] = icmp ne i64 [[L_2]], 0
343415; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP_I_US]], [[CMP55_US]]
344416; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[AND]] to i8
345- ; CHECK-NEXT: store i8 [[EXT]], ptr [[DST:%.* ]], align 1
417+ ; CHECK-NEXT: store i8 [[EXT]], ptr [[DST]], align 1
346418; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
347419; CHECK-NEXT: [[IV_1_NEXT_WIDE]] = zext i32 [[IV_1_NEXT]] to i64
348- ; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B:%.* ]]
349- ; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP ]], label [[EXIT]]
420+ ; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B]]
421+ ; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP1 ]], label [[EXIT]], !llvm.loop [[LOOP15:![0-9]+ ]]
350422; CHECK: exit:
351423; CHECK-NEXT: ret void
352424;
@@ -360,7 +432,8 @@ loop:
360432 br i1 %ec.1 , label %loop.latch , label %exit
361433
362434loop.latch:
363- %l.1 = load i64 , ptr %src.1 , align 8
435+ %gep.src.1 = getelementptr inbounds i64 , ptr %src.1 , i32 %iv.1
436+ %l.1 = load i64 , ptr %gep.src.1 , align 8
364437 %l.2 = load i64 , ptr %src.2 , align 8
365438 %cmp55.us = icmp eq i64 %l.1 , 0
366439 %cmp.i.us = icmp ne i64 %l.2 , 0
0 commit comments