66; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
77; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
88
9- ; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s
10- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
11- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
12- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
13- ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
9+ ; XUN: llc -global-isel=1 -new-reg-bank-select - mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s
10+ ; RUN: llc -global-isel=1 -new-reg-bank-select - mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
11+ ; RUN: llc -global-isel=1 -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
12+ ; RUN: llc -global-isel=1 -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
13+ ; RUN: llc -global-isel=1 -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
1414
1515; define half @test_ldexp_f16_i16(ptr addrspace(1) %out, half %a, i16 %b) #0 {
1616; %result = call half @llvm.experimental.constrained.ldexp.f16.i16(half %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -287,11 +287,12 @@ define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a,
287287; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff
288288; GFX9-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1
289289; GFX9-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1
290+ ; GFX9-GISEL-NEXT: v_med3_i32 v0, v6, v0, v1
290291; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4
291292; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
292- ; GFX9-GISEL-NEXT: v_med3_i32 v0, v6, v0, v1
293293; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v0
294294; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v4
295+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s4, 16, v1
295296; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
296297;
297298; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v3f16_v3i32:
@@ -312,18 +313,21 @@ define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a,
312313; GFX11-GISEL-FAKE16: ; %bb.0:
313314; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314315; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7fff
315- ; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_3 )
316+ ; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3 ) | instid1(VALU_DEP_4 )
316317; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v4, v0
317- ; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
318- ; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v5, 0xffff8000, v5, v0
319- ; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v2, v1
320- ; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
321- ; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v4, v5
322318; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v4, 0xffff8000, v6, v0
319+ ; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
320+ ; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v0, 0xffff8000, v5, v0
321+ ; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v2, v1
322+ ; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
323+ ; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v3, v4
324+ ; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v6, v0
325+ ; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
323326; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
324- ; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
325- ; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v1
326- ; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v3, v4
327+ ; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
328+ ; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
329+ ; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
330+ ; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v2
327331; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
328332 %result = call <3 x half > @llvm.experimental.constrained.ldexp.v3f16.v3i32 (<3 x half > %a , <3 x i32 > %b , metadata !"round.dynamic" , metadata !"fpexcept.strict" )
329333 ret <3 x half > %result
@@ -482,6 +486,74 @@ define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a,
482486 ret <4 x half > %result
483487}
484488
489+ define amdgpu_ps half @s_test_ldexp_f16_i32 (half inreg %a , i32 inreg %b ) #0 {
490+ ; GFX8-SDAG-LABEL: s_test_ldexp_f16_i32:
491+ ; GFX8-SDAG: ; %bb.0:
492+ ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0xffff8000
493+ ; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0x7fff
494+ ; GFX8-SDAG-NEXT: v_med3_i32 v0, s1, v0, v1
495+ ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, s0, v0
496+ ; GFX8-SDAG-NEXT: ; return to shader part epilog
497+ ;
498+ ; GFX9-SDAG-LABEL: s_test_ldexp_f16_i32:
499+ ; GFX9-SDAG: ; %bb.0:
500+ ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0xffff8000
501+ ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x7fff
502+ ; GFX9-SDAG-NEXT: v_med3_i32 v0, s1, v0, v1
503+ ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, s0, v0
504+ ; GFX9-SDAG-NEXT: ; return to shader part epilog
505+ ;
506+ ; GFX11-SDAG-TRUE16-LABEL: s_test_ldexp_f16_i32:
507+ ; GFX11-SDAG-TRUE16: ; %bb.0:
508+ ; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0x7fff
509+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
510+ ; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v0, 0xffff8000, s1, v0
511+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, s0, v0.l
512+ ; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog
513+ ;
514+ ; GFX11-SDAG-FAKE16-LABEL: s_test_ldexp_f16_i32:
515+ ; GFX11-SDAG-FAKE16: ; %bb.0:
516+ ; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7fff
517+ ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
518+ ; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v0, 0xffff8000, s1, v0
519+ ; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, s0, v0
520+ ; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog
521+ ;
522+ ; GFX8-GISEL-LABEL: s_test_ldexp_f16_i32:
523+ ; GFX8-GISEL: ; %bb.0:
524+ ; GFX8-GISEL-NEXT: s_max_i32 s1, s1, 0xffff8000
525+ ; GFX8-GISEL-NEXT: s_min_i32 s1, s1, 0x7fff
526+ ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s1
527+ ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v0, s0, v0
528+ ; GFX8-GISEL-NEXT: ; return to shader part epilog
529+ ;
530+ ; GFX9-GISEL-LABEL: s_test_ldexp_f16_i32:
531+ ; GFX9-GISEL: ; %bb.0:
532+ ; GFX9-GISEL-NEXT: s_max_i32 s1, s1, 0xffff8000
533+ ; GFX9-GISEL-NEXT: s_min_i32 s1, s1, 0x7fff
534+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s1
535+ ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, s0, v0
536+ ; GFX9-GISEL-NEXT: ; return to shader part epilog
537+ ;
538+ ; GFX11-GISEL-TRUE16-LABEL: s_test_ldexp_f16_i32:
539+ ; GFX11-GISEL-TRUE16: ; %bb.0:
540+ ; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s1, s1, 0xffff8000
541+ ; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
542+ ; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s1, s1, 0x7fff
543+ ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e64 v0.l, s0, s1
544+ ; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog
545+ ;
546+ ; GFX11-GISEL-FAKE16-LABEL: s_test_ldexp_f16_i32:
547+ ; GFX11-GISEL-FAKE16: ; %bb.0:
548+ ; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s1, s1, 0xffff8000
549+ ; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
550+ ; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s1, s1, 0x7fff
551+ ; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e64 v0, s0, s1
552+ ; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog
553+ %result = call half @llvm.experimental.constrained.ldexp.f16.i32 (half %a , i32 %b , metadata !"round.dynamic" , metadata !"fpexcept.strict" )
554+ ret half %result
555+ }
556+
485557declare half @llvm.experimental.constrained.ldexp.f16.i16 (half , i16 , metadata , metadata ) #1
486558declare half @llvm.experimental.constrained.ldexp.f16.i32 (half , i32 , metadata , metadata ) #1
487559declare <2 x half > @llvm.experimental.constrained.ldexp.v2f16.v2i16 (<2 x half >, <2 x i16 >, metadata , metadata ) #1
0 commit comments