diff --git a/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll b/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll new file mode 100644 index 0000000000000..981e33f89d956 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s + +; Reduce a 64-bit add by a constant if we know the low 32-bits are all +; zero. + +; add i64:x, K if computeTrailingZeros(K) >= 32 +; => build_pair (add x.hi, K.hi), x.lo + +define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_0(i64 inreg %reg) { +; GFX9-LABEL: s_add_i64_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, 0x40000 +; GFX9-NEXT: ; return to shader part epilog + %add = add i64 %reg, 1125899906842624 ; (1 << 50) + ret i64 %add +} + +define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_1(i64 inreg %reg) { +; GFX9-LABEL: s_add_i64_const_low_bits_known0_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, 1 +; GFX9-NEXT: ; return to shader part epilog + %add = add i64 %reg, 4294967296 ; (1 << 32) + ret i64 %add +} + +define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_2(i64 inreg %reg) { +; GFX9-LABEL: s_add_i64_const_low_bits_known0_2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, 2 +; GFX9-NEXT: ; return to shader part epilog + %add = add i64 %reg, 8589934592 ; (1 << 33) + ret i64 %add +} + +define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_3(i64 inreg %reg) { +; GFX9-LABEL: s_add_i64_const_low_bits_known0_3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX9-NEXT: ; return to shader part epilog + %add = add i64 %reg, -9223372036854775808 ; (1 << 63) + ret i64 %add +} + +define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_4(i64 inreg %reg) { +; GFX9-LABEL: s_add_i64_const_low_bits_known0_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, -1 +; GFX9-NEXT: ; return to shader part epilog + %add = add i64 %reg, -4294967296 ; 0xffffffff00000000 + ret i64 %add +} + +define i64 @v_add_i64_const_low_bits_known0_0(i64 %reg) { +; GFX9-LABEL: v_add_i64_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, 0x40000 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %add = add i64 %reg, 1125899906842624 ; (1 << 50) + ret i64 %add +} + +define i64 @v_add_i64_const_low_bits_known0_1(i64 %reg) { +; GFX9-LABEL: v_add_i64_const_low_bits_known0_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %add = add i64 %reg, 4294967296 ; (1 << 32) + ret i64 %add +} + +define i64 @v_add_i64_const_low_bits_known0_2(i64 %reg) { +; GFX9-LABEL: v_add_i64_const_low_bits_known0_2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %add = add i64 %reg, 8589934592 ; (1 << 33) + ret i64 %add +} + +define i64 @v_add_i64_const_low_bits_known0_3(i64 %reg) { +; GFX9-LABEL: v_add_i64_const_low_bits_known0_3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %add = add i64 %reg, -9223372036854775808 ; (1 << 63) + ret i64 %add +} + +define i64 @v_add_i64_const_low_bits_known0_4(i64 %reg) { +; GFX9-LABEL: v_add_i64_const_low_bits_known0_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %add = add i64 %reg, -4294967296 ; 0xffffffff00000000 + ret i64 %add +} + +define amdgpu_ps i64 @s_add_i64_const_high_bits_known0_0(i64 inreg %reg) { +; GFX9-LABEL: s_add_i64_const_high_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, -1 +; GFX9-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-NEXT: ; return to shader part epilog + %add = add i64 %reg, 4294967295 ; (1 << 31) + ret i64 %add +} + +define i64 @v_add_i64_const_high_bits_known0_0(i64 %reg) { +; GFX9-LABEL: v_add_i64_const_high_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %add = add i64 %reg, 4294967295 ; (1 << 31) + ret i64 %add +} + +define <2 x i64> @v_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) { +; GFX9-LABEL: v_add_v2i64_splat_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 1, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %add = add <2 x i64> %reg, ; (1 << 32) + ret <2 x i64> %add +} + +define <2 x i64> @v_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) { +; GFX9-LABEL: v_add_v2i64_nonsplat_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 2, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %add = add <2 x i64> %reg, ; (1 << 32), (1 << 33) + ret <2 x i64> %add +} + +define amdgpu_ps <2 x i64> @s_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) { +; GFX9-LABEL: s_add_v2i64_splat_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, 1 +; GFX9-NEXT: s_add_u32 s2, s2, 0 +; GFX9-NEXT: s_addc_u32 s3, s3, 1 +; GFX9-NEXT: ; return to shader part epilog + %add = add <2 x i64> %reg, ; (1 << 32) + ret <2 x i64> %add +} + +define amdgpu_ps <2 x i64> @s_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) { +; GFX9-LABEL: s_add_v2i64_nonsplat_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, 1 +; GFX9-NEXT: s_add_u32 s2, s2, 0 +; GFX9-NEXT: s_addc_u32 s3, s3, 2 +; GFX9-NEXT: ; return to shader part epilog + %add = add <2 x i64> %reg, ; (1 << 32), (1 << 33) + ret <2 x i64> %add +} + +; We could reduce this to use a 32-bit add if we use computeKnownBits +define i64 @v_add_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) { +; GFX9-LABEL: v_add_i64_variable_high_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %zext.offset.hi32 = zext i32 %offset.hi32 to i64 + %in.high.bits = shl i64 %zext.offset.hi32, 32 + %add = add i64 %reg, %in.high.bits + ret i64 %add +} + +; We could reduce this to use a 32-bit add if we use computeKnownBits +define amdgpu_ps i64 @s_add_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) { +; GFX9-LABEL: s_add_i64_variable_high_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, s2 +; GFX9-NEXT: ; return to shader part epilog + %zext.offset.hi32 = zext i32 %offset.hi32 to i64 + %in.high.bits = shl i64 %zext.offset.hi32, 32 + %add = add i64 %reg, %in.high.bits + ret i64 %add +} diff --git a/llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll b/llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll new file mode 100644 index 0000000000000..779c4aef647b4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s + +; Reduce a 64-bit sub by a constant if we know the low 32-bits are all +; zero. + +; sub i64:x, K if computeTrailingZeros(K) >= 32 +; => build_pair (sub x.hi, K.hi), x.lo + +define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_0(i64 inreg %reg) { +; GFX9-LABEL: s_sub_i64_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, 0xfffc0000 +; GFX9-NEXT: ; return to shader part epilog + %sub = sub i64 %reg, 1125899906842624 ; (1 << 50) + ret i64 %sub +} + +define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_1(i64 inreg %reg) { +; GFX9-LABEL: s_sub_i64_const_low_bits_known0_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, -1 +; GFX9-NEXT: ; return to shader part epilog + %sub = sub i64 %reg, 4294967296 ; (1 << 32) + ret i64 %sub +} + +define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_2(i64 inreg %reg) { +; GFX9-LABEL: s_sub_i64_const_low_bits_known0_2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, -2 +; GFX9-NEXT: ; return to shader part epilog + %sub = sub i64 %reg, 8589934592 ; (1 << 33) + ret i64 %sub +} + +define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_3(i64 inreg %reg) { +; GFX9-LABEL: s_sub_i64_const_low_bits_known0_3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX9-NEXT: ; return to shader part epilog + %sub = sub i64 %reg, -9223372036854775808 ; (1 << 63) + ret i64 %sub +} + +define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_4(i64 inreg %reg) { +; GFX9-LABEL: s_sub_i64_const_low_bits_known0_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, 1 +; GFX9-NEXT: ; return to shader part epilog + %sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000 + ret i64 %sub +} + +define i64 @v_sub_i64_const_low_bits_known0_0(i64 %reg) { +; GFX9-LABEL: v_sub_i64_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, 0xfffc0000 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %reg, 1125899906842624 ; (1 << 50) + ret i64 %sub +} + +define i64 @v_sub_i64_const_low_bits_known0_1(i64 %reg) { +; GFX9-LABEL: v_sub_i64_const_low_bits_known0_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %reg, 4294967296 ; (1 << 32) + ret i64 %sub +} + +define i64 @v_sub_i64_const_low_bits_known0_2(i64 %reg) { +; GFX9-LABEL: v_sub_i64_const_low_bits_known0_2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -2, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %reg, 8589934592 ; (1 << 33) + ret i64 %sub +} + +define i64 @v_sub_i64_const_low_bits_known0_3(i64 %reg) { +; GFX9-LABEL: v_sub_i64_const_low_bits_known0_3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %reg, -9223372036854775808 ; (1 << 63) + ret i64 %sub +} + +define i64 @v_sub_i64_const_low_bits_known0_4(i64 %reg) { +; GFX9-LABEL: v_sub_i64_const_low_bits_known0_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000 + ret i64 %sub +} + +define amdgpu_ps i64 @s_sub_i64_const_high_bits_known0_0(i64 inreg %reg) { +; GFX9-LABEL: s_sub_i64_const_high_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 1 +; GFX9-NEXT: s_addc_u32 s1, s1, -1 +; GFX9-NEXT: ; return to shader part epilog + %sub = sub i64 %reg, 4294967295 ; (1 << 31) + ret i64 %sub +} + +define i64 @v_sub_i64_const_high_bits_known0_0(i64 %reg) { +; GFX9-LABEL: v_sub_i64_const_high_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %reg, 4294967295 ; (1 << 31) + ret i64 %sub +} + +define <2 x i64> @v_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) { +; GFX9-LABEL: v_sub_v2i64_splat_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %sub = sub <2 x i64> %reg, ; (1 << 32) + ret <2 x i64> %sub +} + +define <2 x i64> @v_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) { +; GFX9-LABEL: v_sub_v2i64_nonsplat_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, -2, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %sub = sub <2 x i64> %reg, ; (1 << 32), (1 << 33) + ret <2 x i64> %sub +} + +define amdgpu_ps <2 x i64> @s_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) { +; GFX9-LABEL: s_sub_v2i64_splat_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, -1 +; GFX9-NEXT: s_add_u32 s2, s2, 0 +; GFX9-NEXT: s_addc_u32 s3, s3, -1 +; GFX9-NEXT: ; return to shader part epilog + %sub = sub <2 x i64> %reg, ; (1 << 32) + ret <2 x i64> %sub +} + +define amdgpu_ps <2 x i64> @s_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) { +; GFX9-LABEL: s_sub_v2i64_nonsplat_const_low_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s0, 0 +; GFX9-NEXT: s_addc_u32 s1, s1, -1 +; GFX9-NEXT: s_add_u32 s2, s2, 0 +; GFX9-NEXT: s_addc_u32 s3, s3, -2 +; GFX9-NEXT: ; return to shader part epilog + %sub = sub <2 x i64> %reg, ; (1 << 32), (1 << 33) + ret <2 x i64> %sub +} + +; We could reduce this to use a 32-bit sub if we use computeKnownBits +define i64 @v_sub_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) { +; GFX9-LABEL: v_sub_i64_variable_high_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %zext.offset.hi32 = zext i32 %offset.hi32 to i64 + %in.high.bits = shl i64 %zext.offset.hi32, 32 + %sub = sub i64 %reg, %in.high.bits + ret i64 %sub +} + +; We could reduce this to use a 32-bit sub if we use computeKnownBits +define amdgpu_ps i64 @s_sub_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) { +; GFX9-LABEL: s_sub_i64_variable_high_bits_known0_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_sub_u32 s0, s0, 0 +; GFX9-NEXT: s_subb_u32 s1, s1, s2 +; GFX9-NEXT: ; return to shader part epilog + %zext.offset.hi32 = zext i32 %offset.hi32 to i64 + %in.high.bits = shl i64 %zext.offset.hi32, 32 + %sub = sub i64 %reg, %in.high.bits + ret i64 %sub +}