@@ -119,7 +119,6 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
119119; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[ARG1]])
120120; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
121121; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
122- ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
123122; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
124123; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
125124; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
@@ -273,85 +272,75 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
273272; CHECK-NEXT: [[ENTRY:.*]]:
274273; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 3
275274; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
276- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 64
275+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 56
277276; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
278277; CHECK: [[VECTOR_SCEVCHECK]]:
279278; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 3
280279; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
281280; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
282281; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
283- ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 0, [[MUL_RESULT]]
284282; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]]
285283; CHECK-NEXT: [[TMP41:%.*]] = icmp ult ptr [[TMP32]], [[A]]
286284; CHECK-NEXT: [[TMP44:%.*]] = or i1 [[TMP41]], [[MUL_OVERFLOW]]
287285; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
288286; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
289287; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
290288; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
291- ; CHECK-NEXT: [[TMP55:%.*]] = sub i64 0, [[MUL_RESULT2]]
292289; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]]
293290; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]]
294291; CHECK-NEXT: [[TMP57:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]]
295292; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8
296293; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
297294; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
298295; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
299- ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 0, [[MUL_RESULT3]]
300296; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
301297; CHECK-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]]
302298; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]]
303299; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 12
304300; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
305301; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
306302; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
307- ; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT7]]
308303; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
309304; CHECK-NEXT: [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP5]]
310305; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW8]]
311306; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
312307; CHECK-NEXT: [[MUL10:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
313308; CHECK-NEXT: [[MUL_RESULT11:%.*]] = extractvalue { i64, i1 } [[MUL10]], 0
314309; CHECK-NEXT: [[MUL_OVERFLOW12:%.*]] = extractvalue { i64, i1 } [[MUL10]], 1
315- ; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT11]]
316310; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]]
317311; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]]
318312; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]]
319313; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 20
320314; CHECK-NEXT: [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
321315; CHECK-NEXT: [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0
322316; CHECK-NEXT: [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1
323- ; CHECK-NEXT: [[TMP19:%.*]] = sub i64 0, [[MUL_RESULT15]]
324317; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]]
325318; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]]
326319; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]]
327320; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 24
328321; CHECK-NEXT: [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
329322; CHECK-NEXT: [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0
330323; CHECK-NEXT: [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1
331- ; CHECK-NEXT: [[TMP23:%.*]] = sub i64 0, [[MUL_RESULT19]]
332324; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]]
333325; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]]
334326; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]]
335327; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 28
336328; CHECK-NEXT: [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
337329; CHECK-NEXT: [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0
338330; CHECK-NEXT: [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1
339- ; CHECK-NEXT: [[TMP27:%.*]] = sub i64 0, [[MUL_RESULT23]]
340331; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]]
341332; CHECK-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]]
342333; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]]
343334; CHECK-NEXT: [[SCEVGEP31:%.*]] = getelementptr i8, ptr [[B]], i64 4
344335; CHECK-NEXT: [[MUL29:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]])
345336; CHECK-NEXT: [[MUL_RESULT30:%.*]] = extractvalue { i64, i1 } [[MUL29]], 0
346337; CHECK-NEXT: [[MUL_OVERFLOW31:%.*]] = extractvalue { i64, i1 } [[MUL29]], 1
347- ; CHECK-NEXT: [[TMP67:%.*]] = sub i64 0, [[MUL_RESULT30]]
348338; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SCEVGEP31]], i64 [[MUL_RESULT30]]
349339; CHECK-NEXT: [[TMP69:%.*]] = icmp ult ptr [[TMP68]], [[SCEVGEP31]]
350340; CHECK-NEXT: [[TMP70:%.*]] = or i1 [[TMP69]], [[MUL_OVERFLOW31]]
351341; CHECK-NEXT: [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
352342; CHECK-NEXT: [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0
353343; CHECK-NEXT: [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1
354- ; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[MUL_RESULT26]]
355344; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[B]], i64 [[MUL_RESULT26]]
356345; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP71]], [[B]]
357346; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]]
0 commit comments