@@ -91,7 +91,7 @@ for.body: ; preds = %for.body.preheader,
9191 br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
9292}
9393
94- define void @thirdorderrec (ptr nocapture noundef readonly %x , ptr noalias nocapture noundef writeonly %y , i32 noundef %n ) # 0 {
94+ define void @thirdorderrec (ptr nocapture noundef readonly %x , ptr noalias nocapture noundef writeonly %y , i32 noundef %n ) {
9595; CHECK-LABEL: @thirdorderrec(
9696; CHECK-NEXT: entry:
9797; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 3
@@ -352,3 +352,101 @@ loop:
352352exit:
353353 ret void
354354}
355+
356+ define void @test_for_tried_to_force_scalar (ptr noalias %A , ptr noalias %B , ptr noalias %C , i64 %n ) #0 {
357+ ; CHECK-LABEL: @test_for_tried_to_force_scalar(
358+ ; CHECK-NEXT: entry:
359+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
360+ ; CHECK-NEXT: [[CONFLICT_RDX20:%.*]] = icmp ule i64 [[TMP0]], 8
361+ ; CHECK-NEXT: br i1 [[CONFLICT_RDX20]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
362+ ; CHECK: vector.ph:
363+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
364+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
365+ ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 8, i64 [[N_MOD_VF]]
366+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP4]]
367+ ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x ptr> poison, ptr [[A:%.*]], i32 3
368+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
369+ ; CHECK: vector.body:
370+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
371+ ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ]
372+ ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
373+ ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
374+ ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2
375+ ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 3
376+ ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 4
377+ ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5
378+ ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6
379+ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7
380+ ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP5]]
381+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]]
382+ ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]]
383+ ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]]
384+ ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP13]], i32 0
385+ ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x ptr> [[TMP17]], ptr [[TMP14]], i32 1
386+ ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x ptr> [[TMP18]], ptr [[TMP15]], i32 2
387+ ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x ptr> [[TMP19]], ptr [[TMP16]], i32 3
388+ ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP9]]
389+ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP10]]
390+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP11]]
391+ ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP12]]
392+ ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP21]], i32 0
393+ ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP22]], i32 1
394+ ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x ptr> [[TMP26]], ptr [[TMP23]], i32 2
395+ ; CHECK-NEXT: [[TMP28]] = insertelement <4 x ptr> [[TMP27]], ptr [[TMP24]], i32 3
396+ ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x ptr> [[TMP20]], <4 x ptr> [[TMP28]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
397+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP21]], align 4
398+ ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
399+ ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x float> [[STRIDED_VEC]], i32 3
400+ ; CHECK-NEXT: store float [[TMP30]], ptr [[C:%.*]], align 4
401+ ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 0
402+ ; CHECK-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP31]], align 4
403+ ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 1
404+ ; CHECK-NEXT: [[TMP32:%.*]] = load float, ptr [[TMP33]], align 4
405+ ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 2
406+ ; CHECK-NEXT: [[TMP34:%.*]] = load float, ptr [[TMP35]], align 4
407+ ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 3
408+ ; CHECK-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP37]], align 4
409+ ; CHECK-NEXT: store float [[TMP36]], ptr [[B:%.*]], align 4
410+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
411+ ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
412+ ; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
413+ ; CHECK: middle.block:
414+ ; CHECK-NEXT: br label [[SCALAR_PH]]
415+ ; CHECK: scalar.ph:
416+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
417+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi ptr [ [[TMP24]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
418+ ; CHECK-NEXT: br label [[LOOP:%.*]]
419+ ; CHECK: loop:
420+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
421+ ; CHECK-NEXT: [[PREV:%.*]] = phi ptr [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[NEXT:%.*]], [[LOOP]] ]
422+ ; CHECK-NEXT: [[NEXT]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[IV]]
423+ ; CHECK-NEXT: [[TMP40:%.*]] = load float, ptr [[NEXT]], align 4
424+ ; CHECK-NEXT: store float [[TMP40]], ptr [[C]], align 4
425+ ; CHECK-NEXT: [[TMP41:%.*]] = load float, ptr [[PREV]], align 4
426+ ; CHECK-NEXT: store float [[TMP41]], ptr [[B]], align 4
427+ ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
428+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]]
429+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
430+ ; CHECK: exit:
431+ ; CHECK-NEXT: ret void
432+ ;
433+ entry:
434+ br label %loop
435+
436+ loop:
437+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
438+ %prev = phi ptr [ %A , %entry ], [ %next , %loop ]
439+ %next = getelementptr nusw [3 x float ], ptr %A , i64 %iv
440+ %0 = load float , ptr %next , align 4
441+ store float %0 , ptr %C , align 4
442+ %1 = load float , ptr %prev , align 4
443+ store float %1 , ptr %B , align 4
444+ %iv.next = add nsw i64 %iv , 1
445+ %exitcond.not = icmp eq i64 %iv , %n
446+ br i1 %exitcond.not , label %exit , label %loop
447+
448+ exit:
449+ ret void
450+ }
451+
452+ attributes #0 = { "target-cpu" ="znver3" }
0 commit comments