@@ -433,6 +433,81 @@ exit: ; preds = %loop.latch
433433 ret void
434434}
435435
436+ define void @forced_scalar_instr (ptr %gep.dst ) {
437+ ; COMMON-LABEL: define void @forced_scalar_instr(
438+ ; COMMON-SAME: ptr [[GEP_DST:%.*]]) {
439+ ; COMMON-NEXT: [[ENTRY:.*:]]
440+ ; COMMON-NEXT: br label %[[VECTOR_PH:.*]]
441+ ; COMMON: [[VECTOR_PH]]:
442+ ; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
443+ ; COMMON: [[VECTOR_BODY]]:
444+ ; COMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
445+ ; COMMON-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
446+ ; COMMON-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
447+ ; COMMON-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4)
448+ ; COMMON-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
449+ ; COMMON-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
450+ ; COMMON: [[PRED_STORE_IF]]:
451+ ; COMMON-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
452+ ; COMMON-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 0
453+ ; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP3]]
454+ ; COMMON-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], 1
455+ ; COMMON-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4
456+ ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]]
457+ ; COMMON: [[PRED_STORE_CONTINUE]]:
458+ ; COMMON-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
459+ ; COMMON-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
460+ ; COMMON: [[PRED_STORE_IF1]]:
461+ ; COMMON-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
462+ ; COMMON-NEXT: [[TMP9:%.*]] = add i32 [[TMP0]], 1
463+ ; COMMON-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP8]]
464+ ; COMMON-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], 1
465+ ; COMMON-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4
466+ ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE2]]
467+ ; COMMON: [[PRED_STORE_CONTINUE2]]:
468+ ; COMMON-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
469+ ; COMMON-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
470+ ; COMMON: [[PRED_STORE_IF3]]:
471+ ; COMMON-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 2
472+ ; COMMON-NEXT: [[TMP14:%.*]] = add i32 [[TMP0]], 2
473+ ; COMMON-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP13]]
474+ ; COMMON-NEXT: [[TMP16:%.*]] = or i32 [[TMP14]], 1
475+ ; COMMON-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4
476+ ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE4]]
477+ ; COMMON: [[PRED_STORE_CONTINUE4]]:
478+ ; COMMON-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
479+ ; COMMON-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
480+ ; COMMON: [[PRED_STORE_IF5]]:
481+ ; COMMON-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3
482+ ; COMMON-NEXT: [[TMP19:%.*]] = add i32 [[TMP0]], 3
483+ ; COMMON-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP18]]
484+ ; COMMON-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], 1
485+ ; COMMON-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4
486+ ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE6]]
487+ ; COMMON: [[PRED_STORE_CONTINUE6]]:
488+ ; COMMON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
489+ ; COMMON-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
490+ ; COMMON-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
491+ ; COMMON-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
492+ ; COMMON: [[MIDDLE_BLOCK]]:
493+ ; COMMON-NEXT: br label %[[EXIT:.*]]
494+ entry:
495+ br label %loop
496+
497+ loop:
498+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
499+ %gep = getelementptr i32 , ptr %gep.dst , i64 %iv
500+ %t = trunc i64 %iv to i32
501+ %o = or i32 %t , 1
502+ store i32 %o , ptr %gep , align 4
503+ %iv.next = add i64 %iv , 1
504+ %ec = icmp eq i64 %iv , 4
505+ br i1 %ec , label %exit , label %loop
506+
507+ exit:
508+ ret void
509+ }
510+
436511attributes #0 = { "target-features" ="+neon,+sve" vscale_range(1 ,16 ) }
437512
438513declare void @llvm.assume (i1 noundef)
0 commit comments