@@ -463,39 +463,41 @@ define amdgpu_kernel void @test_smul24_i64_square(ptr addrspace(1) %out, i32 %a,
463463define amdgpu_kernel void @test_smul24_i33 (ptr addrspace (1 ) %out , i33 %a , i33 %b ) #0 {
464464; SI-LABEL: test_smul24_i33:
465465; SI: ; %bb.0: ; %entry
466- ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
467- ; SI-NEXT: s_load_dword s6, s[4:5], 0xd
468- ; SI-NEXT: s_load_dword s4, s[4:5], 0xb
469- ; SI-NEXT: s_mov_b32 s3, 0xf000
470- ; SI-NEXT: s_mov_b32 s2, -1
466+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
467+ ; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
468+ ; SI-NEXT: s_mov_b32 s7, 0xf000
469+ ; SI-NEXT: s_mov_b32 s6, -1
471470; SI-NEXT: s_waitcnt lgkmcnt(0)
472- ; SI-NEXT: s_bfe_i32 s5, s6, 0x180000
473- ; SI-NEXT: s_bfe_i32 s4, s4, 0x180000
474- ; SI-NEXT: v_mov_b32_e32 v0, s5
475- ; SI-NEXT: s_mul_i32 s5, s4, s5
476- ; SI-NEXT: v_mul_hi_i32_i24_e32 v1, s4, v0
477- ; SI-NEXT: v_mov_b32_e32 v0, s5
471+ ; SI-NEXT: s_mov_b32 s4, s0
472+ ; SI-NEXT: s_mov_b32 s5, s1
473+ ; SI-NEXT: s_bfe_i32 s0, s8, 0x180000
474+ ; SI-NEXT: s_bfe_i32 s1, s2, 0x180000
475+ ; SI-NEXT: v_mov_b32_e32 v0, s0
476+ ; SI-NEXT: s_mul_i32 s0, s1, s0
477+ ; SI-NEXT: v_mul_hi_i32_i24_e32 v1, s1, v0
478+ ; SI-NEXT: v_mov_b32_e32 v0, s0
478479; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
479480; SI-NEXT: v_ashr_i64 v[0:1], v[0:1], 31
480- ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3 ], 0
481+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7 ], 0
481482; SI-NEXT: s_endpgm
482483;
483484; VI-LABEL: test_smul24_i33:
484485; VI: ; %bb.0: ; %entry
485- ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
486- ; VI-NEXT: s_load_dword s3, s[4:5], 0x34
487- ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
486+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
487+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
488+ ; VI-NEXT: s_mov_b32 s7, 0xf000
489+ ; VI-NEXT: s_mov_b32 s6, -1
488490; VI-NEXT: s_waitcnt lgkmcnt(0)
489491; VI-NEXT: s_bfe_i32 s2, s2, 0x180000
490- ; VI-NEXT: s_bfe_i32 s3, s3 , 0x180000
492+ ; VI-NEXT: s_bfe_i32 s3, s4 , 0x180000
491493; VI-NEXT: v_mov_b32_e32 v0, s3
492494; VI-NEXT: v_mul_hi_i32_i24_e32 v1, s2, v0
493495; VI-NEXT: v_mul_i32_i24_e32 v0, s2, v0
494496; VI-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
495- ; VI-NEXT: s_mov_b32 s3, 0xf000
497+ ; VI-NEXT: s_mov_b32 s4, s0
496498; VI-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
497- ; VI-NEXT: s_mov_b32 s2, -1
498- ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3 ], 0
499+ ; VI-NEXT: s_mov_b32 s5, s1
500+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7 ], 0
499501; VI-NEXT: s_endpgm
500502;
501503; GFX9-LABEL: test_smul24_i33:
@@ -574,30 +576,32 @@ entry:
574576define amdgpu_kernel void @test_smulhi24_i33 (ptr addrspace (1 ) %out , i33 %a , i33 %b ) {
575577; SI-LABEL: test_smulhi24_i33:
576578; SI: ; %bb.0: ; %entry
577- ; SI-NEXT: s_load_dword s6, s[4:5], 0xd
578- ; SI-NEXT: s_load_dword s7, s[4:5], 0xb
579- ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
580- ; SI-NEXT: s_mov_b32 s3, 0xf000
581- ; SI-NEXT: s_mov_b32 s2, -1
579+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
580+ ; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
581+ ; SI-NEXT: s_mov_b32 s7, 0xf000
582+ ; SI-NEXT: s_mov_b32 s6, -1
582583; SI-NEXT: s_waitcnt lgkmcnt(0)
583- ; SI-NEXT: v_mov_b32_e32 v0, s6
584- ; SI-NEXT: v_mul_hi_i32_i24_e32 v0, s7, v0
584+ ; SI-NEXT: s_mov_b32 s4, s0
585+ ; SI-NEXT: s_mov_b32 s5, s1
586+ ; SI-NEXT: v_mov_b32_e32 v0, s8
587+ ; SI-NEXT: v_mul_hi_i32_i24_e32 v0, s2, v0
585588; SI-NEXT: v_and_b32_e32 v0, 1, v0
586- ; SI-NEXT: buffer_store_dword v0, off, s[0:3 ], 0
589+ ; SI-NEXT: buffer_store_dword v0, off, s[4:7 ], 0
587590; SI-NEXT: s_endpgm
588591;
589592; VI-LABEL: test_smulhi24_i33:
590593; VI: ; %bb.0: ; %entry
591- ; VI-NEXT: s_load_dword s6, s[4:5], 0x34
592- ; VI-NEXT: s_load_dword s7, s[4:5], 0x2c
593- ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
594- ; VI-NEXT: s_mov_b32 s3, 0xf000
595- ; VI-NEXT: s_mov_b32 s2, -1
594+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
595+ ; VI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
596+ ; VI-NEXT: s_mov_b32 s7, 0xf000
597+ ; VI-NEXT: s_mov_b32 s6, -1
596598; VI-NEXT: s_waitcnt lgkmcnt(0)
597- ; VI-NEXT: v_mov_b32_e32 v0, s6
598- ; VI-NEXT: v_mul_hi_i32_i24_e32 v0, s7, v0
599+ ; VI-NEXT: s_mov_b32 s4, s0
600+ ; VI-NEXT: v_mov_b32_e32 v0, s8
601+ ; VI-NEXT: v_mul_hi_i32_i24_e32 v0, s2, v0
602+ ; VI-NEXT: s_mov_b32 s5, s1
599603; VI-NEXT: v_and_b32_e32 v0, 1, v0
600- ; VI-NEXT: buffer_store_dword v0, off, s[0:3 ], 0
604+ ; VI-NEXT: buffer_store_dword v0, off, s[4:7 ], 0
601605; VI-NEXT: s_endpgm
602606;
603607; GFX9-LABEL: test_smulhi24_i33:
0 commit comments