@@ -813,4 +813,102 @@ bb7:
813813 ret void
814814
815815}
816+
817+ define amdgpu_kernel void @test_umul_i24 (ptr addrspace (1 ) %out , i32 %arg ) {
818+ ; SI-LABEL: test_umul_i24:
819+ ; SI: ; %bb.0:
820+ ; SI-NEXT: s_load_dword s1, s[2:3], 0xb
821+ ; SI-NEXT: v_mov_b32_e32 v0, 0xff803fe1
822+ ; SI-NEXT: s_mov_b32 s0, 0
823+ ; SI-NEXT: s_mov_b32 s3, 0xf000
824+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
825+ ; SI-NEXT: s_lshr_b32 s1, s1, 9
826+ ; SI-NEXT: v_mul_hi_u32 v0, s1, v0
827+ ; SI-NEXT: s_mul_i32 s1, s1, 0xff803fe1
828+ ; SI-NEXT: v_alignbit_b32 v0, v0, s1, 1
829+ ; SI-NEXT: s_mov_b32 s2, -1
830+ ; SI-NEXT: s_mov_b32 s1, s0
831+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
832+ ; SI-NEXT: s_endpgm
833+ ;
834+ ; VI-LABEL: test_umul_i24:
835+ ; VI: ; %bb.0:
836+ ; VI-NEXT: s_load_dword s0, s[2:3], 0x2c
837+ ; VI-NEXT: v_mov_b32_e32 v0, 0xff803fe1
838+ ; VI-NEXT: s_mov_b32 s3, 0xf000
839+ ; VI-NEXT: s_mov_b32 s2, -1
840+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
841+ ; VI-NEXT: s_lshr_b32 s0, s0, 9
842+ ; VI-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s0, v0, 0
843+ ; VI-NEXT: s_mov_b32 s0, 0
844+ ; VI-NEXT: s_mov_b32 s1, s0
845+ ; VI-NEXT: v_alignbit_b32 v0, v1, v0, 1
846+ ; VI-NEXT: s_nop 1
847+ ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
848+ ; VI-NEXT: s_endpgm
849+ ;
850+ ; GFX9-LABEL: test_umul_i24:
851+ ; GFX9: ; %bb.0:
852+ ; GFX9-NEXT: s_load_dword s1, s[2:3], 0x2c
853+ ; GFX9-NEXT: s_mov_b32 s0, 0
854+ ; GFX9-NEXT: s_mov_b32 s3, 0xf000
855+ ; GFX9-NEXT: s_mov_b32 s2, -1
856+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
857+ ; GFX9-NEXT: s_lshr_b32 s1, s1, 9
858+ ; GFX9-NEXT: s_mul_hi_u32 s4, s1, 0xff803fe1
859+ ; GFX9-NEXT: s_mul_i32 s1, s1, 0xff803fe1
860+ ; GFX9-NEXT: v_mov_b32_e32 v0, s1
861+ ; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, 1
862+ ; GFX9-NEXT: s_mov_b32 s1, s0
863+ ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
864+ ; GFX9-NEXT: s_endpgm
865+ ;
866+ ; EG-LABEL: test_umul_i24:
867+ ; EG: ; %bb.0:
868+ ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
869+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
870+ ; EG-NEXT: CF_END
871+ ; EG-NEXT: PAD
872+ ; EG-NEXT: ALU clause starting at 4:
873+ ; EG-NEXT: LSHR * T0.W, KC0[2].Z, literal.x,
874+ ; EG-NEXT: 9(1.261169e-44), 0(0.000000e+00)
875+ ; EG-NEXT: MULHI * T0.X, PV.W, literal.x,
876+ ; EG-NEXT: -8372255(nan), 0(0.000000e+00)
877+ ; EG-NEXT: MULLO_INT * T0.Y, T0.W, literal.x,
878+ ; EG-NEXT: -8372255(nan), 0(0.000000e+00)
879+ ; EG-NEXT: BIT_ALIGN_INT T0.X, T0.X, PS, 1,
880+ ; EG-NEXT: MOV * T1.X, literal.x,
881+ ; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
882+ ;
883+ ; CM-LABEL: test_umul_i24:
884+ ; CM: ; %bb.0:
885+ ; CM-NEXT: ALU 14, @4, KC0[CB0:0-32], KC1[]
886+ ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
887+ ; CM-NEXT: CF_END
888+ ; CM-NEXT: PAD
889+ ; CM-NEXT: ALU clause starting at 4:
890+ ; CM-NEXT: LSHR * T0.W, KC0[2].Z, literal.x,
891+ ; CM-NEXT: 9(1.261169e-44), 0(0.000000e+00)
892+ ; CM-NEXT: MULHI T0.X, T0.W, literal.x,
893+ ; CM-NEXT: MULHI T0.Y (MASKED), T0.W, literal.x,
894+ ; CM-NEXT: MULHI T0.Z (MASKED), T0.W, literal.x,
895+ ; CM-NEXT: MULHI * T0.W (MASKED), T0.W, literal.x,
896+ ; CM-NEXT: -8372255(nan), 0(0.000000e+00)
897+ ; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, literal.x,
898+ ; CM-NEXT: MULLO_INT T0.Y, T0.W, literal.x,
899+ ; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, literal.x,
900+ ; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, literal.x,
901+ ; CM-NEXT: -8372255(nan), 0(0.000000e+00)
902+ ; CM-NEXT: BIT_ALIGN_INT * T0.X, T0.X, PV.Y, 1,
903+ ; CM-NEXT: MOV * T1.X, literal.x,
904+ ; CM-NEXT: 0(0.000000e+00), 0(0.000000e+00)
905+ %i = lshr i32 %arg , 9
906+ %i1 = zext i32 %i to i64
907+ %i2 = mul i64 %i1 , 4286595041
908+ %i3 = lshr i64 %i2 , 1
909+ %i4 = trunc i64 %i3 to i32
910+ store i32 %i4 , ptr addrspace (1 ) null , align 4
911+ ret void
912+ }
913+
816914attributes #0 = { nounwind }
0 commit comments