@@ -4273,4 +4273,265 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) {
42734273}
42744274
42754275declare void @llvm.assume (i1 )
4276+ declare i32 @llvm.ctlz.i32 (i32 , i1 )
4277+
4278+ ; Ceiling division by power-of-2: (x >> log2(N)) + ((x & (N-1)) != 0) -> (x + (N-1)) >> log2(N)
4279+ ; This is only valid when x + (N-1) doesn't overflow
4280+
4281+ ; Test with known range that prevents overflow
4282+ define i32 @ceil_div_by_8_known_range (i32 range(i32 0 , 100 ) %x ) {
4283+ ; CHECK-LABEL: @ceil_div_by_8_known_range(
4284+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7
4285+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4286+ ; CHECK-NEXT: ret i32 [[R]]
4287+ ;
4288+ %shr = lshr i32 %x , 3
4289+ %and = and i32 %x , 7
4290+ %cmp = icmp ne i32 %and , 0
4291+ %ext = zext i1 %cmp to i32
4292+ %r = add i32 %shr , %ext
4293+ ret i32 %r
4294+ }
4295+
4296+ ; Test with the exact IR from the original testcase
4297+ define i32 @ceil_div_from_clz (i32 %v ) {
4298+ ; CHECK-LABEL: @ceil_div_from_clz(
4299+ ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 false)
4300+ ; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i32 39, [[CTLZ]]
4301+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4302+ ; CHECK-NEXT: ret i32 [[R]]
4303+ ;
4304+ %ctlz = tail call range(i32 0 , 33 ) i32 @llvm.ctlz.i32 (i32 %v , i1 false )
4305+ %sub = sub nuw nsw i32 32 , %ctlz
4306+ %shr = lshr i32 %sub , 3
4307+ %and = and i32 %sub , 7
4308+ %cmp = icmp ne i32 %and , 0
4309+ %ext = zext i1 %cmp to i32
4310+ %r = add nuw nsw i32 %shr , %ext
4311+ ret i32 %r
4312+ }
4313+
4314+ ; Vector version with known range
4315+ define <2 x i32 > @ceil_div_by_8_vec_range (<2 x i32 > range(i32 0 , 1000 ) %x ) {
4316+ ; CHECK-LABEL: @ceil_div_by_8_vec_range(
4317+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <2 x i32> [[X:%.*]], splat (i32 7)
4318+ ; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3)
4319+ ; CHECK-NEXT: ret <2 x i32> [[R]]
4320+ ;
4321+ %shr = lshr <2 x i32 > %x , <i32 3 , i32 3 >
4322+ %and = and <2 x i32 > %x , <i32 7 , i32 7 >
4323+ %cmp = icmp ne <2 x i32 > %and , <i32 0 , i32 0 >
4324+ %ext = zext <2 x i1 > %cmp to <2 x i32 >
4325+ %r = add <2 x i32 > %shr , %ext
4326+ ret <2 x i32 > %r
4327+ }
4328+
4329+ ; Ceiling division by 16 with known range
4330+ define i16 @ceil_div_by_16_i16 (i16 range(i16 0 , 1000 ) %x ) {
4331+ ; CHECK-LABEL: @ceil_div_by_16_i16(
4332+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[X:%.*]], 15
4333+ ; CHECK-NEXT: [[R:%.*]] = lshr i16 [[TMP1]], 4
4334+ ; CHECK-NEXT: ret i16 [[R]]
4335+ ;
4336+ %shr = lshr i16 %x , 4
4337+ %and = and i16 %x , 15
4338+ %cmp = icmp ne i16 %and , 0
4339+ %ext = zext i1 %cmp to i16
4340+ %r = add i16 %shr , %ext
4341+ ret i16 %r
4342+ }
4343+
4344+ ; Negative test: no overflow guarantee - should NOT optimize
4345+ define i32 @ceil_div_by_8_no_overflow_info (i32 %x ) {
4346+ ; CHECK-LABEL: @ceil_div_by_8_no_overflow_info(
4347+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4348+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4349+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4350+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4351+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4352+ ; CHECK-NEXT: ret i32 [[R]]
4353+ ;
4354+ %shr = lshr i32 %x , 3
4355+ %and = and i32 %x , 7
4356+ %cmp = icmp ne i32 %and , 0
4357+ %ext = zext i1 %cmp to i32
4358+ %r = add i32 %shr , %ext
4359+ ret i32 %r
4360+ }
4361+
4362+ ; Negative test: nuw on final add doesn't help
4363+ define i32 @ceil_div_by_8_only_nuw_on_add (i32 %x ) {
4364+ ; CHECK-LABEL: @ceil_div_by_8_only_nuw_on_add(
4365+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4366+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4367+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4368+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4369+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4370+ ; CHECK-NEXT: ret i32 [[R]]
4371+ ;
4372+ %shr = lshr i32 %x , 3
4373+ %and = and i32 %x , 7
4374+ %cmp = icmp ne i32 %and , 0
4375+ %ext = zext i1 %cmp to i32
4376+ %r = add nuw i32 %shr , %ext ; nuw here doesn't prove x+7 won't overflow
4377+ ret i32 %r
4378+ }
4379+
4380+ ; Negative test: wrong mask
4381+ define i32 @ceil_div_wrong_mask (i32 range(i32 0 , 100 ) %x ) {
4382+ ; CHECK-LABEL: @ceil_div_wrong_mask(
4383+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4384+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 6
4385+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4386+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4387+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4388+ ; CHECK-NEXT: ret i32 [[R]]
4389+ ;
4390+ %shr = lshr i32 %x , 3
4391+ %and = and i32 %x , 6 ; Wrong mask: should be 7
4392+ %cmp = icmp ne i32 %and , 0
4393+ %ext = zext i1 %cmp to i32
4394+ %r = add i32 %shr , %ext
4395+ ret i32 %r
4396+ }
4397+
4398+ ; Negative test: wrong shift amount
4399+ define i32 @ceil_div_wrong_shift (i32 range(i32 0 , 100 ) %x ) {
4400+ ; CHECK-LABEL: @ceil_div_wrong_shift(
4401+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 4
4402+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4403+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4404+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4405+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4406+ ; CHECK-NEXT: ret i32 [[R]]
4407+ ;
4408+ %shr = lshr i32 %x , 4 ; Shift by 4, but mask is 7 (should be 15)
4409+ %and = and i32 %x , 7
4410+ %cmp = icmp ne i32 %and , 0
4411+ %ext = zext i1 %cmp to i32
4412+ %r = add i32 %shr , %ext
4413+ ret i32 %r
4414+ }
4415+
4416+ ; Negative test: wrong comparison
4417+ define i32 @ceil_div_wrong_cmp (i32 range(i32 0 , 100 ) %x ) {
4418+ ; CHECK-LABEL: @ceil_div_wrong_cmp(
4419+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4420+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4421+ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
4422+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4423+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4424+ ; CHECK-NEXT: ret i32 [[R]]
4425+ ;
4426+ %shr = lshr i32 %x , 3
4427+ %and = and i32 %x , 7
4428+ %cmp = icmp eq i32 %and , 0 ; Wrong: should be ne
4429+ %ext = zext i1 %cmp to i32
4430+ %r = add i32 %shr , %ext
4431+ ret i32 %r
4432+ }
4433+
4434+ ; Multi-use test: all intermediate values have uses
4435+ define i32 @ceil_div_multi_use (i32 range(i32 0 , 100 ) %x ) {
4436+ ; CHECK-LABEL: @ceil_div_multi_use(
4437+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4438+ ; CHECK-NEXT: call void @use_i32(i32 [[SHR]])
4439+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4440+ ; CHECK-NEXT: call void @use_i32(i32 [[AND]])
4441+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4442+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4443+ ; CHECK-NEXT: call void @use_i32(i32 [[EXT]])
4444+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4445+ ; CHECK-NEXT: ret i32 [[R]]
4446+ ;
4447+ %shr = lshr i32 %x , 3
4448+ call void @use_i32 (i32 %shr )
4449+ %and = and i32 %x , 7
4450+ call void @use_i32 (i32 %and )
4451+ %cmp = icmp ne i32 %and , 0
4452+ %ext = zext i1 %cmp to i32
4453+ call void @use_i32 (i32 %ext )
4454+ %r = add i32 %shr , %ext
4455+ ret i32 %r
4456+ }
4457+
4458+ ; Commuted test: add operands are swapped
4459+ define i32 @ceil_div_commuted (i32 range(i32 0 , 100 ) %x ) {
4460+ ; CHECK-LABEL: @ceil_div_commuted(
4461+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7
4462+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4463+ ; CHECK-NEXT: ret i32 [[R]]
4464+ ;
4465+ %shr = lshr i32 %x , 3
4466+ %and = and i32 %x , 7
4467+ %cmp = icmp ne i32 %and , 0
4468+ %ext = zext i1 %cmp to i32
4469+ %r = add i32 %ext , %shr ; Operands swapped
4470+ ret i32 %r
4471+ }
4472+
4473+ ; Commuted with multi-use
4474+ define i32 @ceil_div_commuted_multi_use (i32 range(i32 0 , 100 ) %x ) {
4475+ ; CHECK-LABEL: @ceil_div_commuted_multi_use(
4476+ ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4477+ ; CHECK-NEXT: call void @use_i32(i32 [[SHR]])
4478+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4479+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4480+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4481+ ; CHECK-NEXT: call void @use_i32(i32 [[EXT]])
4482+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4483+ ; CHECK-NEXT: ret i32 [[R]]
4484+ ;
4485+ %shr = lshr i32 %x , 3
4486+ call void @use_i32 (i32 %shr )
4487+ %and = and i32 %x , 7
4488+ %cmp = icmp ne i32 %and , 0
4489+ %ext = zext i1 %cmp to i32
4490+ call void @use_i32 (i32 %ext )
4491+ %r = add i32 %ext , %shr ; Operands swapped
4492+ ret i32 %r
4493+ }
4494+
4495+ ; Multi-use test where only zext has multiple uses - should still optimize
4496+ define i32 @ceil_div_zext_multi_use (i32 range(i32 0 , 100 ) %x ) {
4497+ ; CHECK-LABEL: @ceil_div_zext_multi_use(
4498+ ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 7
4499+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4500+ ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4501+ ; CHECK-NEXT: call void @use_i32(i32 [[EXT]])
4502+ ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X]], 7
4503+ ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4504+ ; CHECK-NEXT: ret i32 [[R]]
4505+ ;
4506+ %shr = lshr i32 %x , 3
4507+ %and = and i32 %x , 7
4508+ %cmp = icmp ne i32 %and , 0
4509+ %ext = zext i1 %cmp to i32
4510+ call void @use_i32 (i32 %ext )
4511+ %r = add i32 %shr , %ext
4512+ ret i32 %r
4513+ }
4514+
4515+ ; Multi-use with vector type
4516+ define <2 x i32 > @ceil_div_vec_multi_use (<2 x i32 > range(i32 0 , 1000 ) %x ) {
4517+ ; CHECK-LABEL: @ceil_div_vec_multi_use(
4518+ ; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 3)
4519+ ; CHECK-NEXT: call void @use_vec(<2 x i32> [[SHR]])
4520+ ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X]], splat (i32 7)
4521+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer
4522+ ; CHECK-NEXT: [[EXT:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32>
4523+ ; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[SHR]], [[EXT]]
4524+ ; CHECK-NEXT: ret <2 x i32> [[R]]
4525+ ;
4526+ %shr = lshr <2 x i32 > %x , <i32 3 , i32 3 >
4527+ call void @use_vec (<2 x i32 > %shr )
4528+ %and = and <2 x i32 > %x , <i32 7 , i32 7 >
4529+ %cmp = icmp ne <2 x i32 > %and , <i32 0 , i32 0 >
4530+ %ext = zext <2 x i1 > %cmp to <2 x i32 >
4531+ %r = add <2 x i32 > %shr , %ext
4532+ ret <2 x i32 > %r
4533+ }
4534+
4535+ declare void @use_i32 (i32 )
4536+ declare void @use_vec (<2 x i32 >)
42764537declare void @fake_func (i32 )
0 commit comments