@@ -4466,8 +4466,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
4466
4466
; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4467
4467
; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4468
4468
; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4469
- ; IND-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
4470
- ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32 , ptr [[A:%.*]], i64 [[TMP0]]
4469
+ ; IND-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
4470
+ ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8 , ptr [[A:%.*]], i64 [[TMP0]]
4471
4471
; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
4472
4472
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
4473
4473
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
@@ -4483,8 +4483,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
4483
4483
; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4484
4484
; IND-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4485
4485
; IND-NEXT: [[SEXT1:%.*]] = shl i64 [[INDVARS_IV]], 32
4486
- ; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT1]], 32
4487
- ; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32 , ptr [[A]], i64 [[TMP3]]
4486
+ ; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT1]], 30
4487
+ ; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8 , ptr [[A]], i64 [[TMP3]]
4488
4488
; IND-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
4489
4489
; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4490
4490
; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
@@ -4507,8 +4507,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
4507
4507
; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4508
4508
; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4509
4509
; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4510
- ; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
4511
- ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32 , ptr [[A:%.*]], i64 [[TMP0]]
4510
+ ; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
4511
+ ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8 , ptr [[A:%.*]], i64 [[TMP0]]
4512
4512
; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
4513
4513
; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
4514
4514
; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
@@ -4526,8 +4526,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
4526
4526
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4527
4527
; UNROLL-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4528
4528
; UNROLL-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
4529
- ; UNROLL-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 32
4530
- ; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32 , ptr [[A]], i64 [[TMP4]]
4529
+ ; UNROLL-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 30
4530
+ ; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8 , ptr [[A]], i64 [[TMP4]]
4531
4531
; UNROLL-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
4532
4532
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4533
4533
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
@@ -4599,8 +4599,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
4599
4599
; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4600
4600
; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
4601
4601
; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4602
- ; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
4603
- ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32 , ptr [[A:%.*]], i64 [[TMP0]]
4602
+ ; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
4603
+ ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8 , ptr [[A:%.*]], i64 [[TMP0]]
4604
4604
; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
4605
4605
; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
4606
4606
; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
@@ -4618,8 +4618,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
4618
4618
; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4619
4619
; INTERLEAVE-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4620
4620
; INTERLEAVE-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
4621
- ; INTERLEAVE-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 32
4622
- ; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32 , ptr [[A]], i64 [[TMP4]]
4621
+ ; INTERLEAVE-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 30
4622
+ ; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8 , ptr [[A]], i64 [[TMP4]]
4623
4623
; INTERLEAVE-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
4624
4624
; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4625
4625
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
@@ -6009,8 +6009,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
6009
6009
; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6010
6010
; IND-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP0]]
6011
6011
; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6012
- ; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT]], 32
6013
- ; IND-NEXT: [[TMP4:%.*]] = getelementptr i32 , ptr [[DST:%.*]], i64 [[TMP3]]
6012
+ ; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT]], 30
6013
+ ; IND-NEXT: [[TMP4:%.*]] = getelementptr i8 , ptr [[DST:%.*]], i64 [[TMP3]]
6014
6014
; IND-NEXT: [[TMP5:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP2]]
6015
6015
; IND-NEXT: store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4
6016
6016
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -6044,8 +6044,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
6044
6044
; UNROLL-NEXT: [[TMP3:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP0]]
6045
6045
; UNROLL-NEXT: [[TMP4:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP1]]
6046
6046
; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6047
- ; UNROLL-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
6048
- ; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i32 , ptr [[DST:%.*]], i64 [[TMP5]]
6047
+ ; UNROLL-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 30
6048
+ ; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i8 , ptr [[DST:%.*]], i64 [[TMP5]]
6049
6049
; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP3]]
6050
6050
; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[STEP_ADD]], [[TMP4]]
6051
6051
; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP6]], i64 8
@@ -6139,8 +6139,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
6139
6139
; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT4]], [[TMP0]]
6140
6140
; INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT4]], [[TMP1]]
6141
6141
; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6142
- ; INTERLEAVE-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
6143
- ; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr i32 , ptr [[DST:%.*]], i64 [[TMP5]]
6142
+ ; INTERLEAVE-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 30
6143
+ ; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr i8 , ptr [[DST:%.*]], i64 [[TMP5]]
6144
6144
; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[VEC_IND]], [[TMP3]]
6145
6145
; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[STEP_ADD]], [[TMP4]]
6146
6146
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16
@@ -6166,8 +6166,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
6166
6166
; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
6167
6167
; INTERLEAVE-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32
6168
6168
; INTERLEAVE-NEXT: [[SEXT5:%.*]] = shl i64 [[IV]], 32
6169
- ; INTERLEAVE-NEXT: [[TMP11:%.*]] = ashr exact i64 [[SEXT5]], 32
6170
- ; INTERLEAVE-NEXT: [[DST_GEP:%.*]] = getelementptr i32 , ptr [[DST]], i64 [[TMP11]]
6169
+ ; INTERLEAVE-NEXT: [[TMP11:%.*]] = ashr exact i64 [[SEXT5]], 30
6170
+ ; INTERLEAVE-NEXT: [[DST_GEP:%.*]] = getelementptr i8 , ptr [[DST]], i64 [[TMP11]]
6171
6171
; INTERLEAVE-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[IV_TRUNC]]
6172
6172
; INTERLEAVE-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4
6173
6173
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
0 commit comments