@@ -4273,4 +4273,161 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) {
42734273}
42744274
42754275declare void @llvm.assume (i1 )
4276+ declare i32 @llvm.ctlz.i32 (i32 , i1 )
4277+
4278+ ; Ceiling division by power-of-2: (x >> log2(N)) + ((x & (N-1)) != 0) -> (x + (N-1)) >> log2(N)
4279+ ; This is only valid when x + (N-1) doesn't overflow
4280+
4281+ ; Test with known range that prevents overflow
4282+ define noundef range(i32 0 , 100 ) i32 @ceil_div_by_8_known_range (i32 noundef range(i32 0 , 100 ) %x ) {
4283+ ; CHECK-LABEL: @ceil_div_by_8_known_range(
4284+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7
4285+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4286+ ; CHECK-NEXT: ret i32 [[R]]
4287+ ;
4288+ %shr = lshr i32 %x , 3
4289+ %and = and i32 %x , 7
4290+ %cmp = icmp ne i32 %and , 0
4291+ %ext = zext i1 %cmp to i32
4292+ %r = add i32 %shr , %ext
4293+ ret i32 %r
4294+ }
4295+
4296+ ; Test with the exact IR from the original testcase
4297+ define noundef range(i32 0 , 6 ) i32 @ceil_div_from_clz (i32 noundef %v ) {
4298+ ; CHECK-LABEL: @ceil_div_from_clz(
4299+ ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 false)
4300+ ; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i32 39, [[CTLZ]]
4301+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4302+ ; CHECK-NEXT: ret i32 [[R]]
4303+ ;
4304+ %ctlz = tail call range(i32 0 , 33 ) i32 @llvm.ctlz.i32 (i32 %v , i1 false )
4305+ %sub = sub nuw nsw i32 32 , %ctlz
4306+ %shr = lshr i32 %sub , 3
4307+ %and = and i32 %sub , 7
4308+ %cmp = icmp ne i32 %and , 0
4309+ %ext = zext i1 %cmp to i32
4310+ %r = add nuw nsw i32 %shr , %ext
4311+ ret i32 %r
4312+ }
4313+
4314+ ; Vector version with known range
4315+ define <2 x i32 > @ceil_div_by_8_vec_range (<2 x i32 > range(i32 0 , 1000 ) %x ) {
4316+ ; CHECK-LABEL: @ceil_div_by_8_vec_range(
4317+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <2 x i32> [[X:%.*]], splat (i32 7)
4318+ ; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3)
4319+ ; CHECK-NEXT: ret <2 x i32> [[R]]
4320+ ;
4321+ %shr = lshr <2 x i32 > %x , <i32 3 , i32 3 >
4322+ %and = and <2 x i32 > %x , <i32 7 , i32 7 >
4323+ %cmp = icmp ne <2 x i32 > %and , <i32 0 , i32 0 >
4324+ %ext = zext <2 x i1 > %cmp to <2 x i32 >
4325+ %r = add <2 x i32 > %shr , %ext
4326+ ret <2 x i32 > %r
4327+ }
4328+
4329+ ; Ceiling division by 16 with known range
4330+ define i16 @ceil_div_by_16_i16 (i16 range(i16 0 , 1000 ) %x ) {
4331+ ; CHECK-LABEL: @ceil_div_by_16_i16(
4332+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[X:%.*]], 15
4333+ ; CHECK-NEXT: [[R:%.*]] = lshr i16 [[TMP1]], 4
4334+ ; CHECK-NEXT: ret i16 [[R]]
4335+ ;
4336+ %shr = lshr i16 %x , 4
4337+ %and = and i16 %x , 15
4338+ %cmp = icmp ne i16 %and , 0
4339+ %ext = zext i1 %cmp to i16
4340+ %r = add i16 %shr , %ext
4341+ ret i16 %r
4342+ }
4343+
4344+ ; Negative test: no overflow guarantee - should NOT optimize
4345+ define i32 @ceil_div_by_8_no_overflow_info (i32 %x ) {
4346+ ; CHECK-LABEL: @ceil_div_by_8_no_overflow_info(
4347+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4348+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4349+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4350+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4351+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4352+ ; CHECK-NEXT: ret i32 [[R]]
4353+ ;
4354+ %shr = lshr i32 %x , 3
4355+ %and = and i32 %x , 7
4356+ %cmp = icmp ne i32 %and , 0
4357+ %ext = zext i1 %cmp to i32
4358+ %r = add i32 %shr , %ext
4359+ ret i32 %r
4360+ }
4361+
4362+ ; Negative test: nuw on final add doesn't help
4363+ define i32 @ceil_div_by_8_only_nuw_on_add (i32 %x ) {
4364+ ; CHECK-LABEL: @ceil_div_by_8_only_nuw_on_add(
4365+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4366+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4367+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4368+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4369+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4370+ ; CHECK-NEXT: ret i32 [[R]]
4371+ ;
4372+ %shr = lshr i32 %x , 3
4373+ %and = and i32 %x , 7
4374+ %cmp = icmp ne i32 %and , 0
4375+ %ext = zext i1 %cmp to i32
4376+ %r = add nuw i32 %shr , %ext ; nuw here doesn't prove x+7 won't overflow
4377+ ret i32 %r
4378+ }
4379+
4380+ ; Negative test: wrong mask
4381+ define i32 @ceil_div_wrong_mask (i32 range(i32 0 , 100 ) %x ) {
4382+ ; CHECK-LABEL: @ceil_div_wrong_mask(
4383+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4384+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 6
4385+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4386+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4387+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4388+ ; CHECK-NEXT: ret i32 [[R]]
4389+ ;
4390+ %shr = lshr i32 %x , 3
4391+ %and = and i32 %x , 6 ; Wrong mask: should be 7
4392+ %cmp = icmp ne i32 %and , 0
4393+ %ext = zext i1 %cmp to i32
4394+ %r = add i32 %shr , %ext
4395+ ret i32 %r
4396+ }
4397+
4398+ ; Negative test: wrong shift amount
4399+ define i32 @ceil_div_wrong_shift (i32 range(i32 0 , 100 ) %x ) {
4400+ ; CHECK-LABEL: @ceil_div_wrong_shift(
4401+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 4
4402+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4403+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4404+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4405+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4406+ ; CHECK-NEXT: ret i32 [[R]]
4407+ ;
4408+ %shr = lshr i32 %x , 4 ; Shift by 4, but mask is 7 (should be 15)
4409+ %and = and i32 %x , 7
4410+ %cmp = icmp ne i32 %and , 0
4411+ %ext = zext i1 %cmp to i32
4412+ %r = add i32 %shr , %ext
4413+ ret i32 %r
4414+ }
4415+
4416+ ; Negative test: wrong comparison
4417+ define i32 @ceil_div_wrong_cmp (i32 range(i32 0 , 100 ) %x ) {
4418+ ; CHECK-LABEL: @ceil_div_wrong_cmp(
4419+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4420+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4421+ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
4422+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4423+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4424+ ; CHECK-NEXT: ret i32 [[R]]
4425+ ;
4426+ %shr = lshr i32 %x , 3
4427+ %and = and i32 %x , 7
4428+ %cmp = icmp eq i32 %and , 0 ; Wrong: should be ne
4429+ %ext = zext i1 %cmp to i32
4430+ %r = add i32 %shr , %ext
4431+ ret i32 %r
4432+ }
42764433declare void @fake_func (i32 )
0 commit comments