Skip to content
73 changes: 73 additions & 0 deletions llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,76 @@ define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) {
%ptr = inttoptr i32 %int to ptr
ret ptr %ptr
}

define amdgpu_ps half @uniform_v_to_s_f16(half inreg %a, half inreg %b) {
; GFX11-LABEL: uniform_v_to_s_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_max_f16_e64 v0, s0, s1
; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
; GFX11-NEXT: ; return to shader part epilog
%max = call half @llvm.maximum.f16(half %a, half %b)
ret half %max
}

define amdgpu_ps float @uniform_v_to_s_v2f16(<2 x half> inreg %a, <2 x half> inreg %b) {
; GFX11-LABEL: uniform_v_to_s_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_pk_max_f16 v0, s0, s1
; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
; GFX11-NEXT: s_lshr_b32 s2, s1, 16
; GFX11-NEXT: s_lshr_b32 s0, s0, 16
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-NEXT: ; return to shader part epilog
%max = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b)
%cast = bitcast <2 x half> %max to float
ret float %cast
}

define amdgpu_ps float @uniform_v_s_float(i32 inreg %a, i32 inreg %b) {
; GFX11-LABEL: uniform_v_s_float:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: ; return to shader part epilog
%and = and i32 %a, %b
%cast = bitcast i32 %and to float
ret float %cast
}

define amdgpu_ps double @uniform_v_to_s_double(double inreg %a, double inreg %b) {
; GFX11-LABEL: uniform_v_to_s_double:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_max_f64 v[0:1], s[0:1], s[2:3]
; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, 0, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
Comment on lines +85 to +86
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a bug that double is using SGPR returns

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't that expected to use SGPR return?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's not an integer. Really we should redo this system to use inreg on the return value

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then this needs to be fixed. should I create a ticket for fixing this separately?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, or just do it. te the case here would be i64

Copy link
Contributor Author

@PankajDwivedi-25 PankajDwivedi-25 Apr 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get this, what is really required here to fix this.

I can see it will only insert readfirstlane if the dest reg is SGPR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but double should not be treated as a type returned in SGPRs

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at this point return arg reg are already finalized, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bug is in the selection of the registers

; GFX11-NEXT: ; return to shader part epilog
%max0 = call double @llvm.maximum.f64(double %a, double %b)
ret double %max0
}

define amdgpu_ps float @uniform_v_to_s_f32(float inreg %a, float inreg %b) {
; GFX11-LABEL: uniform_v_to_s_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_max_f32_e64 v0, s0, s1
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
; GFX11-NEXT: ; return to shader part epilog
%max0 = call float @llvm.maximum.f32(float %a, float %b)
ret float %max0
}
Loading