@@ -1459,8 +1459,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
1459
1459
; SI-NEXT: s_mov_b32 s3, 0xf000
1460
1460
; SI-NEXT: s_mov_b32 s2, -1
1461
1461
; SI-NEXT: s_waitcnt lgkmcnt(0)
1462
- ; SI-NEXT: s_mul_i32 s6 , s6, s7
1463
- ; SI-NEXT: s_and_b32 s4, s6 , 1
1462
+ ; SI-NEXT: s_and_b32 s4 , s6, s7
1463
+ ; SI-NEXT: s_and_b32 s4, s4 , 1
1464
1464
; SI-NEXT: v_mov_b32_e32 v0, s4
1465
1465
; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
1466
1466
; SI-NEXT: s_endpgm
@@ -1473,8 +1473,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
1473
1473
; VI-NEXT: s_mov_b32 s3, 0xf000
1474
1474
; VI-NEXT: s_mov_b32 s2, -1
1475
1475
; VI-NEXT: s_waitcnt lgkmcnt(0)
1476
- ; VI-NEXT: s_mul_i32 s6 , s6, s7
1477
- ; VI-NEXT: s_and_b32 s4, s6 , 1
1476
+ ; VI-NEXT: s_and_b32 s4 , s6, s7
1477
+ ; VI-NEXT: s_and_b32 s4, s4 , 1
1478
1478
; VI-NEXT: v_mov_b32_e32 v0, s4
1479
1479
; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
1480
1480
; VI-NEXT: s_endpgm
@@ -1487,8 +1487,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
1487
1487
; GFX9-NEXT: s_mov_b32 s3, 0xf000
1488
1488
; GFX9-NEXT: s_mov_b32 s2, -1
1489
1489
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1490
- ; GFX9-NEXT: s_mul_i32 s6 , s6, s7
1491
- ; GFX9-NEXT: s_and_b32 s4, s6 , 1
1490
+ ; GFX9-NEXT: s_and_b32 s4 , s6, s7
1491
+ ; GFX9-NEXT: s_and_b32 s4, s4 , 1
1492
1492
; GFX9-NEXT: v_mov_b32_e32 v0, s4
1493
1493
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
1494
1494
; GFX9-NEXT: s_endpgm
@@ -1500,7 +1500,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
1500
1500
; GFX10-NEXT: s_load_dword s3, s[4:5], 0x70
1501
1501
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1502
1502
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1503
- ; GFX10-NEXT: s_mul_i32 s2, s2, s3
1503
+ ; GFX10-NEXT: s_and_b32 s2, s2, s3
1504
1504
; GFX10-NEXT: s_mov_b32 s3, 0x31016000
1505
1505
; GFX10-NEXT: s_and_b32 s2, s2, 1
1506
1506
; GFX10-NEXT: v_mov_b32_e32 v0, s2
@@ -1515,7 +1515,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
1515
1515
; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x70
1516
1516
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
1517
1517
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1518
- ; GFX11-NEXT: s_mul_i32 s2, s2, s3
1518
+ ; GFX11-NEXT: s_and_b32 s2, s2, s3
1519
1519
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
1520
1520
; GFX11-NEXT: s_and_b32 s2, s2, 1
1521
1521
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -1531,7 +1531,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
1531
1531
; GFX12-NEXT: s_load_b32 s3, s[4:5], 0x70
1532
1532
; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
1533
1533
; GFX12-NEXT: s_wait_kmcnt 0x0
1534
- ; GFX12-NEXT: s_mul_i32 s2, s2, s3
1534
+ ; GFX12-NEXT: s_and_b32 s2, s2, s3
1535
1535
; GFX12-NEXT: s_mov_b32 s3, 0x31016000
1536
1536
; GFX12-NEXT: s_and_b32 s2, s2, 1
1537
1537
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -1555,7 +1555,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
1555
1555
; EG-NEXT: MOV * T0.X, 0.0,
1556
1556
; EG-NEXT: ALU clause starting at 11:
1557
1557
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
1558
- ; EG-NEXT: MULLO_INT * T0.X , T1.X, T0.X,
1558
+ ; EG-NEXT: AND_INT * T1.W , T1.X, T0.X,
1559
1559
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
1560
1560
; EG-NEXT: AND_INT T1.W, PS, 1,
1561
1561
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
@@ -1589,7 +1589,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
1589
1589
; SI-NEXT: s_mov_b32 s4, s0
1590
1590
; SI-NEXT: s_mov_b32 s5, s1
1591
1591
; SI-NEXT: s_waitcnt vmcnt(0)
1592
- ; SI-NEXT: v_mul_lo_u32 v0, v0, v1
1592
+ ; SI-NEXT: v_and_b32_e32 v0, v0, v1
1593
1593
; SI-NEXT: v_and_b32_e32 v0, 1, v0
1594
1594
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
1595
1595
; SI-NEXT: s_endpgm
@@ -1609,7 +1609,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
1609
1609
; VI-NEXT: s_mov_b32 s4, s0
1610
1610
; VI-NEXT: s_mov_b32 s5, s1
1611
1611
; VI-NEXT: s_waitcnt vmcnt(0)
1612
- ; VI-NEXT: v_mul_lo_u32 v0, v0, v1
1612
+ ; VI-NEXT: v_and_b32_e32 v0, v0, v1
1613
1613
; VI-NEXT: v_and_b32_e32 v0, 1, v0
1614
1614
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
1615
1615
; VI-NEXT: s_endpgm
@@ -1629,7 +1629,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
1629
1629
; GFX9-NEXT: s_mov_b32 s4, s0
1630
1630
; GFX9-NEXT: s_mov_b32 s5, s1
1631
1631
; GFX9-NEXT: s_waitcnt vmcnt(0)
1632
- ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
1632
+ ; GFX9-NEXT: v_and_b32_e32 v0, v0, v1
1633
1633
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
1634
1634
; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0
1635
1635
; GFX9-NEXT: s_endpgm
@@ -1650,7 +1650,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
1650
1650
; GFX10-NEXT: s_mov_b32 s4, s0
1651
1651
; GFX10-NEXT: s_mov_b32 s5, s1
1652
1652
; GFX10-NEXT: s_waitcnt vmcnt(0)
1653
- ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
1653
+ ; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
1654
1654
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
1655
1655
; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0
1656
1656
; GFX10-NEXT: s_endpgm
@@ -1671,7 +1671,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
1671
1671
; GFX11-NEXT: s_mov_b32 s4, s0
1672
1672
; GFX11-NEXT: s_mov_b32 s5, s1
1673
1673
; GFX11-NEXT: s_waitcnt vmcnt(0)
1674
- ; GFX11-NEXT: v_mul_lo_u32 v0, v0, v1
1674
+ ; GFX11-NEXT: v_and_b32_e32 v0, v0, v1
1675
1675
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1676
1676
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
1677
1677
; GFX11-NEXT: buffer_store_b8 v0, off, s[4:7], 0
@@ -1693,7 +1693,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
1693
1693
; GFX12-NEXT: s_mov_b32 s4, s0
1694
1694
; GFX12-NEXT: s_mov_b32 s5, s1
1695
1695
; GFX12-NEXT: s_wait_loadcnt 0x0
1696
- ; GFX12-NEXT: v_mul_lo_u32 v0, v0, v1
1696
+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v1
1697
1697
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1698
1698
; GFX12-NEXT: v_and_b32_e32 v0, 1, v0
1699
1699
; GFX12-NEXT: buffer_store_b8 v0, off, s[4:7], null
@@ -1714,7 +1714,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
1714
1714
; EG-NEXT: MOV * T0.X, KC0[2].Z,
1715
1715
; EG-NEXT: ALU clause starting at 11:
1716
1716
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
1717
- ; EG-NEXT: MULLO_INT * T0.X , T0.X, T1.X,
1717
+ ; EG-NEXT: AND_INT * T1.W , T0.X, T1.X,
1718
1718
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
1719
1719
; EG-NEXT: AND_INT T1.W, PS, 1,
1720
1720
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
0 commit comments