-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AMDGPU] Insert readfirstlane in the function returns in sgpr. #135326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
627f6d8
56b0571
eea0c50
d9ab3b9
ec5c6be
0943e98
c720140
b1575e5
4d4f883
1039a6b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,3 +72,76 @@ define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) { | |
| %ptr = inttoptr i32 %int to ptr | ||
| ret ptr %ptr | ||
| } | ||
|
|
||
| define amdgpu_ps half @uniform_v_to_s_f16(half inreg %a, half inreg %b) { | ||
| ; GFX11-LABEL: uniform_v_to_s_f16: | ||
| ; GFX11: ; %bb.0: | ||
| ; GFX11-NEXT: v_max_f16_e64 v0, s0, s1 | ||
| ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1 | ||
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | ||
| ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo | ||
| ; GFX11-NEXT: ; return to shader part epilog | ||
| %max = call half @llvm.maximum.f16(half %a, half %b) | ||
| ret half %max | ||
| } | ||
PankajDwivedi-25 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| define amdgpu_ps float @uniform_v_to_s_v2f16(<2 x half> inreg %a, <2 x half> inreg %b) { | ||
| ; GFX11-LABEL: uniform_v_to_s_v2f16: | ||
| ; GFX11: ; %bb.0: | ||
| ; GFX11-NEXT: v_pk_max_f16 v0, s0, s1 | ||
| ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1 | ||
| ; GFX11-NEXT: s_lshr_b32 s2, s1, 16 | ||
| ; GFX11-NEXT: s_lshr_b32 s0, s0, 16 | ||
| ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 | ||
| ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo | ||
| ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2 | ||
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) | ||
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 | ||
| ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo | ||
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | ||
| ; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | ||
| ; GFX11-NEXT: ; return to shader part epilog | ||
| %max = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b) | ||
PankajDwivedi-25 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| %cast = bitcast <2 x half> %max to float | ||
| ret float %cast | ||
| } | ||
|
|
||
| define amdgpu_ps float @uniform_v_s_float(i32 inreg %a, i32 inreg %b) { | ||
| ; GFX11-LABEL: uniform_v_s_float: | ||
| ; GFX11: ; %bb.0: | ||
| ; GFX11-NEXT: s_and_b32 s0, s0, s1 | ||
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | ||
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 | ||
| ; GFX11-NEXT: ; return to shader part epilog | ||
| %and = and i32 %a, %b | ||
| %cast = bitcast i32 %and to float | ||
| ret float %cast | ||
| } | ||
|
|
||
| define amdgpu_ps double @uniform_v_to_s_double(double inreg %a, double inreg %b) { | ||
| ; GFX11-LABEL: uniform_v_to_s_double: | ||
| ; GFX11: ; %bb.0: | ||
| ; GFX11-NEXT: v_max_f64 v[0:1], s[0:1], s[2:3] | ||
| ; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[2:3] | ||
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) | ||
| ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0 | ||
| ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, 0, s0 | ||
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) | ||
| ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 | ||
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 | ||
|
Comment on lines
+85
to
+86
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems like a bug that double is using SGPR returns
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't that expected to use SGPR return?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it's not an integer. Really we should redo this system to use inreg on the return value
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then this needs to be fixed. should I create a ticket for fixing this separately?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, or just do it. te the case here would be i64
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't get this, what is really required here to fix this. I can see it will only insert readfirstlane if the dest reg is SGPR.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but double should not be treated as a type returned in SGPRs
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. at this point return arg reg are already finalized, right?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The bug is in the selection of the registers |
||
| ; GFX11-NEXT: ; return to shader part epilog | ||
| %max0 = call double @llvm.maximum.f64(double %a, double %b) | ||
| ret double %max0 | ||
| } | ||
|
|
||
| define amdgpu_ps float @uniform_v_to_s_f32(float inreg %a, float inreg %b) { | ||
| ; GFX11-LABEL: uniform_v_to_s_f32: | ||
| ; GFX11: ; %bb.0: | ||
| ; GFX11-NEXT: v_max_f32_e64 v0, s0, s1 | ||
| ; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1 | ||
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | ||
| ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo | ||
| ; GFX11-NEXT: ; return to shader part epilog | ||
| %max0 = call float @llvm.maximum.f32(float %a, float %b) | ||
| ret float %max0 | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.