@@ -164,6 +164,10 @@ class SPIRVInstructionSelector : public InstructionSelector {
164164 bool selectIntegerDot (Register ResVReg, const SPIRVType *ResType,
165165 MachineInstr &I) const ;
166166
167+ template <bool Signed>
168+ bool selectDot4AddPacked (Register ResVReg, const SPIRVType *ResType,
169+ MachineInstr &I) const ;
170+
167171 void renderImm32 (MachineInstrBuilder &MIB, const MachineInstr &I,
168172 int OpIdx) const ;
169173 void renderFImm64 (MachineInstrBuilder &MIB, const MachineInstr &I,
@@ -1694,6 +1698,84 @@ bool SPIRVInstructionSelector::selectIntegerDot(Register ResVReg,
16941698 return Result;
16951699}
16961700
1701+ // Since pre-1.6 SPIRV has no DotProductInput4x8BitPacked implementation,
1702+ // extract the elements of the packed inputs, multiply them and add the result
1703+ // to the accumulator.
1704+ template <bool Signed>
1705+ bool SPIRVInstructionSelector::selectDot4AddPacked (Register ResVReg,
1706+ const SPIRVType *ResType,
1707+ MachineInstr &I) const {
1708+ assert (I.getNumOperands () == 5 );
1709+ assert (I.getOperand (2 ).isReg ());
1710+ assert (I.getOperand (3 ).isReg ());
1711+ assert (I.getOperand (4 ).isReg ());
1712+ MachineBasicBlock &BB = *I.getParent ();
1713+
1714+ bool Result = false ;
1715+
1716+ // Acc = C
1717+ Register Acc = I.getOperand (4 ).getReg ();
1718+ SPIRVType *EltType = GR.getOrCreateSPIRVIntegerType (8 , I, TII);
1719+ auto ExtractOp =
1720+ Signed ? SPIRV::OpBitFieldSExtract : SPIRV::OpBitFieldUExtract;
1721+
1722+ // Extract the i8 element, multiply and add it to the accumulator
1723+ for (unsigned i = 0 ; i < 4 ; i++) {
1724+ // A[i]
1725+ Register AElt = MRI->createVirtualRegister (&SPIRV::IDRegClass);
1726+ Result |= BuildMI (BB, I, I.getDebugLoc (), TII.get (ExtractOp))
1727+ .addDef (AElt)
1728+ .addUse (GR.getSPIRVTypeID (ResType))
1729+ .addUse (I.getOperand (2 ).getReg ())
1730+ .addUse (GR.getOrCreateConstInt (i * 8 , I, EltType, TII))
1731+ .addImm (8 )
1732+ .constrainAllUses (TII, TRI, RBI);
1733+
1734+ // B[i]
1735+ Register BElt = MRI->createVirtualRegister (&SPIRV::IDRegClass);
1736+ Result |= BuildMI (BB, I, I.getDebugLoc (), TII.get (ExtractOp))
1737+ .addDef (BElt)
1738+ .addUse (GR.getSPIRVTypeID (ResType))
1739+ .addUse (I.getOperand (3 ).getReg ())
1740+ .addUse (GR.getOrCreateConstInt (i * 8 , I, EltType, TII))
1741+ .addImm (8 )
1742+ .constrainAllUses (TII, TRI, RBI);
1743+
1744+ // A[i] * B[i]
1745+ Register Mul = MRI->createVirtualRegister (&SPIRV::IDRegClass);
1746+ Result |= BuildMI (BB, I, I.getDebugLoc (), TII.get (SPIRV::OpIMulS))
1747+ .addDef (Mul)
1748+ .addUse (GR.getSPIRVTypeID (ResType))
1749+ .addUse (AElt)
1750+ .addUse (BElt)
1751+ .constrainAllUses (TII, TRI, RBI);
1752+
1753+ // Discard 24 highest-bits so that stored i32 register is i8 equivalent
1754+ Register MaskMul = MRI->createVirtualRegister (&SPIRV::IDRegClass);
1755+ Result |= BuildMI (BB, I, I.getDebugLoc (), TII.get (ExtractOp))
1756+ .addDef (MaskMul)
1757+ .addUse (GR.getSPIRVTypeID (ResType))
1758+ .addUse (Mul)
1759+ .addUse (GR.getOrCreateConstInt (0 , I, EltType, TII))
1760+ .addImm (8 )
1761+ .constrainAllUses (TII, TRI, RBI);
1762+
1763+ // Acc = Acc + A[i] * B[i]
1764+ Register Sum =
1765+ i < 3 ? MRI->createVirtualRegister (&SPIRV::IDRegClass) : ResVReg;
1766+ Result |= BuildMI (BB, I, I.getDebugLoc (), TII.get (SPIRV::OpIAddS))
1767+ .addDef (Sum)
1768+ .addUse (GR.getSPIRVTypeID (ResType))
1769+ .addUse (Acc)
1770+ .addUse (MaskMul)
1771+ .constrainAllUses (TII, TRI, RBI);
1772+
1773+ Acc = Sum;
1774+ }
1775+
1776+ return Result;
1777+ }
1778+
16971779// / Transform saturate(x) to clamp(x, 0.0f, 1.0f) as SPIRV
16981780// / does not have a saturate builtin.
16991781bool SPIRVInstructionSelector::selectSaturate (Register ResVReg,
@@ -2527,6 +2609,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
25272609 case Intrinsic::spv_udot:
25282610 case Intrinsic::spv_sdot:
25292611 return selectIntegerDot (ResVReg, ResType, I);
2612+ case Intrinsic::spv_dot4add_i8packed:
2613+ return selectDot4AddPacked<true >(ResVReg, ResType, I);
25302614 case Intrinsic::spv_all:
25312615 return selectAll (ResVReg, ResType, I);
25322616 case Intrinsic::spv_any:
0 commit comments