@@ -226,11 +226,10 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C
226
226
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 36
227
227
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
228
228
; CHECK: middle.block:
229
- ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
230
229
; CHECK-NEXT: br label [[SCALAR_PH]]
231
230
; CHECK: scalar.ph:
232
231
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 73, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
233
- ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT ]], [[MIDDLE_BLOCK]] ], [ [[REC_START]], [[ENTRY]] ]
232
+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP2 ]], [[MIDDLE_BLOCK]] ], [ [[REC_START]], [[ENTRY]] ]
234
233
; CHECK-NEXT: br label [[LOOP:%.*]]
235
234
; CHECK: loop:
236
235
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -364,28 +363,28 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr
364
363
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
365
364
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2
366
365
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 3
367
- ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 4
368
- ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5
369
- ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6
370
- ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7
371
- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[TMP5]]
372
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]]
373
- ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]]
374
- ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]]
375
- ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP13]], i32 0
376
- ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x ptr> [[TMP17]], ptr [[TMP14]], i32 1
377
- ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x ptr> [[TMP18]], ptr [[TMP15]], i32 2
378
- ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x ptr> [[TMP19]], ptr [[TMP16]], i32 3
379
- ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP9]]
380
- ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP10]]
381
- ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP11]]
382
- ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP12]]
366
+ ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 4
367
+ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 5
368
+ ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6
369
+ ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7
370
+ ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[TMP5]]
371
+ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]]
372
+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]]
373
+ ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]]
383
374
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP21]], i32 0
384
375
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP22]], i32 1
385
376
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x ptr> [[TMP26]], ptr [[TMP23]], i32 2
386
377
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x ptr> [[TMP27]], ptr [[TMP24]], i32 3
387
- ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x ptr> [[TMP20]], <4 x ptr> [[TMP28]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
388
- ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP21]], align 4
378
+ ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP11]]
379
+ ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP12]]
380
+ ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP9]]
381
+ ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP10]]
382
+ ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP19]], i32 0
383
+ ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x ptr> [[TMP33]], ptr [[TMP20]], i32 1
384
+ ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> [[TMP34]], ptr [[TMP31]], i32 2
385
+ ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x ptr> [[TMP35]], ptr [[TMP32]], i32 3
386
+ ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x ptr> [[TMP28]], <4 x ptr> [[TMP38]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
387
+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP19]], align 4
389
388
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
390
389
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x float> [[STRIDED_VEC]], i32 3
391
390
; CHECK-NEXT: store float [[TMP30]], ptr [[C:%.*]], align 4
@@ -399,7 +398,7 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr
399
398
; CHECK-NEXT: br label [[SCALAR_PH]]
400
399
; CHECK: scalar.ph:
401
400
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
402
- ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi ptr [ [[TMP24 ]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
401
+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi ptr [ [[TMP32 ]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
403
402
; CHECK-NEXT: br label [[LOOP:%.*]]
404
403
; CHECK: loop:
405
404
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
0 commit comments