@@ -380,6 +380,84 @@ for.end:
380380 ret void
381381}
382382
383+ define void @forced_scalar_instr (ptr %gep.dst ) {
384+ ; COMMON-LABEL: define void @forced_scalar_instr(
385+ ; COMMON-SAME: ptr [[GEP_DST:%.*]]) {
386+ ; COMMON-NEXT: [[ENTRY:.*:]]
387+ ; COMMON-NEXT: br label %[[VECTOR_PH:.*]]
388+ ; COMMON: [[VECTOR_PH]]:
389+ ; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
390+ ; COMMON: [[VECTOR_BODY]]:
391+ ; COMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
392+ ; COMMON-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
393+ ; COMMON-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
394+ ; COMMON-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4)
395+ ; COMMON-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
396+ ; COMMON-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
397+ ; COMMON: [[PRED_STORE_IF]]:
398+ ; COMMON-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
399+ ; COMMON-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 0
400+ ; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP3]]
401+ ; COMMON-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], 1
402+ ; COMMON-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4
403+ ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]]
404+ ; COMMON: [[PRED_STORE_CONTINUE]]:
405+ ; COMMON-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
406+ ; COMMON-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
407+ ; COMMON: [[PRED_STORE_IF1]]:
408+ ; COMMON-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
409+ ; COMMON-NEXT: [[TMP9:%.*]] = add i32 [[TMP0]], 1
410+ ; COMMON-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP8]]
411+ ; COMMON-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], 1
412+ ; COMMON-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4
413+ ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE2]]
414+ ; COMMON: [[PRED_STORE_CONTINUE2]]:
415+ ; COMMON-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
416+ ; COMMON-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
417+ ; COMMON: [[PRED_STORE_IF3]]:
418+ ; COMMON-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 2
419+ ; COMMON-NEXT: [[TMP14:%.*]] = add i32 [[TMP0]], 2
420+ ; COMMON-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP13]]
421+ ; COMMON-NEXT: [[TMP16:%.*]] = or i32 [[TMP14]], 1
422+ ; COMMON-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4
423+ ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE4]]
424+ ; COMMON: [[PRED_STORE_CONTINUE4]]:
425+ ; COMMON-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
426+ ; COMMON-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
427+ ; COMMON: [[PRED_STORE_IF5]]:
428+ ; COMMON-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3
429+ ; COMMON-NEXT: [[TMP19:%.*]] = add i32 [[TMP0]], 3
430+ ; COMMON-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP18]]
431+ ; COMMON-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], 1
432+ ; COMMON-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4
433+ ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE6]]
434+ ; COMMON: [[PRED_STORE_CONTINUE6]]:
435+ ; COMMON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
436+ ; COMMON-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
437+ ; COMMON-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
438+ ; COMMON-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
439+ ; COMMON: [[MIDDLE_BLOCK]]:
440+ ; COMMON-NEXT: br label %[[EXIT:.*]]
441+ ; COMMON: [[EXIT]]:
442+ ; COMMON-NEXT: ret void
443+ ;
444+ entry:
445+ br label %loop
446+
447+ loop:
448+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
449+ %gep = getelementptr i32 , ptr %gep.dst , i64 %iv
450+ %t = trunc i64 %iv to i32
451+ %o = or i32 %t , 1
452+ store i32 %o , ptr %gep , align 4
453+ %iv.next = add i64 %iv , 1
454+ %ec = icmp eq i64 %iv , 4
455+ br i1 %ec , label %exit , label %loop
456+
457+ exit:
458+ ret void
459+ }
460+
383461attributes #0 = { "target-features" ="+neon,+sve" vscale_range(1 ,16 ) }
384462
385463declare void @llvm.assume (i1 noundef)
0 commit comments