@@ -513,13 +513,15 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out,
513513define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1 (ptr addrspace (1 ) %out , float %a , float %b , float %c , [8 x i32 ], i1 %d ) {
514514; GFX7-LABEL: test_div_fmas_f32_inline_imm_1:
515515; GFX7: ; %bb.0:
516- ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
517- ; GFX7-NEXT: s_load_dword s4, s[4:5], 0xd
516+ ; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2
517+ ; GFX7-NEXT: s_load_dword s3, s[4:5], 0x4
518+ ; GFX7-NEXT: s_load_dword s6, s[4:5], 0xd
519+ ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
518520; GFX7-NEXT: s_waitcnt lgkmcnt(0)
519- ; GFX7-NEXT: v_mov_b32_e32 v1, s3
520- ; GFX7-NEXT: s_and_b32 s3, 1, s4
521521; GFX7-NEXT: v_mov_b32_e32 v0, s2
522- ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3
522+ ; GFX7-NEXT: v_mov_b32_e32 v1, s3
523+ ; GFX7-NEXT: s_and_b32 s2, 1, s6
524+ ; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
523525; GFX7-NEXT: s_mov_b32 s2, -1
524526; GFX7-NEXT: s_mov_b32 s3, 0xf000
525527; GFX7-NEXT: s_nop 1
@@ -529,73 +531,84 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out,
529531;
530532; GFX8-LABEL: test_div_fmas_f32_inline_imm_1:
531533; GFX8: ; %bb.0:
532- ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
533- ; GFX8-NEXT: s_load_dword s4, s[4:5], 0x34
534+ ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x8
535+ ; GFX8-NEXT: s_load_dword s1, s[4:5], 0x10
536+ ; GFX8-NEXT: s_load_dword s2, s[4:5], 0x34
534537; GFX8-NEXT: s_waitcnt lgkmcnt(0)
535- ; GFX8-NEXT: v_mov_b32_e32 v0, s2
536- ; GFX8-NEXT: s_and_b32 s2, 1, s4
537- ; GFX8-NEXT: v_mov_b32_e32 v1, s3
538- ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
539- ; GFX8-NEXT: s_nop 3
538+ ; GFX8-NEXT: v_mov_b32_e32 v0, s0
539+ ; GFX8-NEXT: v_mov_b32_e32 v1, s1
540+ ; GFX8-NEXT: s_and_b32 s0, 1, s2
541+ ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
542+ ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
543+ ; GFX8-NEXT: s_nop 2
540544; GFX8-NEXT: v_div_fmas_f32 v2, v0, 1.0, v1
545+ ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
541546; GFX8-NEXT: v_mov_b32_e32 v0, s0
542547; GFX8-NEXT: v_mov_b32_e32 v1, s1
543548; GFX8-NEXT: flat_store_dword v[0:1], v2
544549; GFX8-NEXT: s_endpgm
545550;
546551; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1:
547552; GFX10_W32: ; %bb.0:
548- ; GFX10_W32-NEXT: s_clause 0x1
549- ; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x34
550- ; GFX10_W32-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
553+ ; GFX10_W32-NEXT: s_clause 0x3
554+ ; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x34
555+ ; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x10
556+ ; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x8
557+ ; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
551558; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
552559; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
553- ; GFX10_W32-NEXT: s_and_b32 s4 , 1, s6
560+ ; GFX10_W32-NEXT: s_and_b32 s2 , 1, s2
554561; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3
555- ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
556- ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s2 , 1.0, v0
562+ ; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
563+ ; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6 , 1.0, v0
557564; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1]
558565; GFX10_W32-NEXT: s_endpgm
559566;
560567; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1:
561568; GFX10_W64: ; %bb.0:
562- ; GFX10_W64-NEXT: s_clause 0x1
563- ; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x34
564- ; GFX10_W64-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
569+ ; GFX10_W64-NEXT: s_clause 0x3
570+ ; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x34
571+ ; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x10
572+ ; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x8
573+ ; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
565574; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
566575; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
567- ; GFX10_W64-NEXT: s_and_b32 s4 , 1, s6
576+ ; GFX10_W64-NEXT: s_and_b32 s2 , 1, s2
568577; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3
569- ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
570- ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s2 , 1.0, v0
578+ ; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
579+ ; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6 , 1.0, v0
571580; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1]
572581; GFX10_W64-NEXT: s_endpgm
573582;
574583; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_1:
575584; GFX11_W32: ; %bb.0:
576- ; GFX11_W32-NEXT: s_clause 0x1
577- ; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x34
578- ; GFX11_W32-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
585+ ; GFX11_W32-NEXT: s_clause 0x3
586+ ; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x34
587+ ; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x10
588+ ; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x8
589+ ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
579590; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
580591; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
581- ; GFX11_W32-NEXT: s_and_b32 s4 , 1, s6
592+ ; GFX11_W32-NEXT: s_and_b32 s2 , 1, s2
582593; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3
583- ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
584- ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s2 , 1.0, v0
594+ ; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
595+ ; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6 , 1.0, v0
585596; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
586597; GFX11_W32-NEXT: s_endpgm
587598;
588599; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_1:
589600; GFX11_W64: ; %bb.0:
590- ; GFX11_W64-NEXT: s_clause 0x1
591- ; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x34
592- ; GFX11_W64-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
601+ ; GFX11_W64-NEXT: s_clause 0x3
602+ ; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x34
603+ ; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x10
604+ ; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x8
605+ ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
593606; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
594607; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
595- ; GFX11_W64-NEXT: s_and_b32 s4 , 1, s6
608+ ; GFX11_W64-NEXT: s_and_b32 s2 , 1, s2
596609; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
597- ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
598- ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s2 , 1.0, v0
610+ ; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
611+ ; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6 , 1.0, v0
599612; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
600613; GFX11_W64-NEXT: s_endpgm
601614 %result = call float @llvm.amdgcn.div.fmas.f32 (float %a , float 1 .0 , float %c , i1 %d )
0 commit comments