@@ -57,6 +57,86 @@ loop.exit:
5757 ret void
5858}
5959
60+ ; Variant with getelementptr nusw.
61+ define void @drop_scalar_gep_nusw (ptr noalias nocapture readonly %input ,
62+ ptr %output ) local_unnamed_addr #0 {
63+ ; CHECK-LABEL: @drop_scalar_gep_nusw(
64+ ; CHECK: vector.body:
65+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
66+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
67+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
68+ ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
69+ ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
70+ ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
71+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
72+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
73+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
74+ entry:
75+ br label %loop.header
76+
77+ loop.header:
78+ %iv = phi i64 [ 0 , %entry ], [ %iv.inc , %if.end ]
79+ %i23 = icmp eq i64 %iv , 0
80+ br i1 %i23 , label %if.end , label %if.then
81+
82+ if.then:
83+ %i27 = sub nuw nsw i64 %iv , 1
84+ %i29 = getelementptr nusw float , ptr %input , i64 %i27
85+ %i30 = load float , ptr %i29 , align 4 , !invariant.load !0
86+ br label %if.end
87+
88+ if.end:
89+ %i34 = phi float [ 0 .000000e+00 , %loop.header ], [ %i30 , %if.then ]
90+ %i35 = getelementptr nusw float , ptr %output , i64 %iv
91+ store float %i34 , ptr %i35 , align 4
92+ %iv.inc = add nuw nsw i64 %iv , 1
93+ %exitcond = icmp eq i64 %iv.inc , 4
94+ br i1 %exitcond , label %loop.exit , label %loop.header
95+
96+ loop.exit:
97+ ret void
98+ }
99+
100+ ; Variant with getelementptr nuw.
101+ define void @drop_scalar_gep_nuw (ptr noalias nocapture readonly %input ,
102+ ptr %output ) local_unnamed_addr #0 {
103+ ; CHECK-LABEL: @drop_scalar_gep_nuw(
104+ ; CHECK: vector.body:
105+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
106+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
107+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
108+ ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
109+ ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
110+ ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
111+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
112+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
113+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
114+ entry:
115+ br label %loop.header
116+
117+ loop.header:
118+ %iv = phi i64 [ 0 , %entry ], [ %iv.inc , %if.end ]
119+ %i23 = icmp eq i64 %iv , 0
120+ br i1 %i23 , label %if.end , label %if.then
121+
122+ if.then:
123+ %i27 = sub nuw nsw i64 %iv , 1
124+ %i29 = getelementptr nuw float , ptr %input , i64 %i27
125+ %i30 = load float , ptr %i29 , align 4 , !invariant.load !0
126+ br label %if.end
127+
128+ if.end:
129+ %i34 = phi float [ 0 .000000e+00 , %loop.header ], [ %i30 , %if.then ]
130+ %i35 = getelementptr nuw float , ptr %output , i64 %iv
131+ store float %i34 , ptr %i35 , align 4
132+ %iv.inc = add nuw nsw i64 %iv , 1
133+ %exitcond = icmp eq i64 %iv.inc , 4
134+ br i1 %exitcond , label %loop.exit , label %loop.header
135+
136+ loop.exit:
137+ ret void
138+ }
139+
60140; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
61141; In this case, 'sub' and 'getelementptr' are not guarded by the predicate.
62142define void @drop_nonpred_scalar_nuw_nsw (ptr noalias nocapture readonly %input ,
0 commit comments