@@ -1756,7 +1756,7 @@ bool GenXLowering::lowerBoolVectorSelect(SelectInst *Inst) {
17561756 * Return: whether any change was made, and thus the current instruction
17571757 * is now marked for erasing
17581758 *
1759- * We handle three cases:
1759+ * We handle four cases:
17601760 *
17611761 * 1. A slice of the vector, which can be turned into rdpredregion.
17621762 *
@@ -1765,6 +1765,10 @@ bool GenXLowering::lowerBoolVectorSelect(SelectInst *Inst) {
17651765 * result of a cmp then we can splat the cmp as an optimization.
17661766 *
17671767 * 3. An unslice of the vector, which can be turned into wrpredregion.
1768+ *
1769+ * 4. General case. Like in the splat case we convert input via select and
1770+ * result is then bitcasted back to vector of i1. Converted vectors are
1771+ * then handled by lowerShuffleToMove
17681772 */
17691773bool GenXLowering::lowerBoolShuffle (ShuffleVectorInst *SI) {
17701774 ShuffleVectorAnalyzer SVA (SI);
@@ -1809,10 +1813,30 @@ bool GenXLowering::lowerBoolShuffle(ShuffleVectorInst *SI) {
18091813 if (SVA.isReplicatedSlice ())
18101814 return false ;
18111815
1812- // No other cases handled.
1813- SI->getContext ().emitError (
1814- SI, " general bool shuffle vector instruction not implemented" );
1815- return false ;
1816+ // 4. General case.
1817+
1818+ // The idea is to convert input i1 vector to i16 vector via select,
1819+ // then do a shufflevector lowering for non-bool case
1820+ // and convert back to i1 vector via icmp instruction.
1821+
1822+ IRBuilder<> B (SI);
1823+ unsigned WidthInput =
1824+ cast<VectorType>(SI->getOperand (0 )->getType ())->getNumElements ();
1825+ unsigned WidthResult = cast<VectorType>(SI->getType ())->getNumElements ();
1826+ Constant *C1 = ConstantVector::getSplat (IGCLLVM::getElementCount (WidthInput),
1827+ B.getInt16 (1 ));
1828+ Constant *C0 = ConstantVector::getSplat (IGCLLVM::getElementCount (WidthInput),
1829+ B.getInt16 (0 ));
1830+ Value *V1 = B.CreateSelect (SI->getOperand (0 ), C1, C0);
1831+ Value *V2 = B.CreateSelect (SI->getOperand (1 ), C1, C0);
1832+ Value *SI1 = B.CreateShuffleVector (V1, V2, SI->getMask (), SI->getName ());
1833+ Constant *C2 = ConstantVector::getSplat (IGCLLVM::getElementCount (WidthResult),
1834+ B.getInt16 (0 ));
1835+ Value *Result = B.CreateICmpNE (SI1, C2);
1836+ SI->replaceAllUsesWith (Result);
1837+ ToErase.push_back (SI);
1838+
1839+ return true ;
18161840}
18171841
18181842/* **********************************************************************
@@ -2022,7 +2046,7 @@ template <typename Iter> Iter skipUndefs(Iter First, Iter Last) {
20222046}
20232047
20242048/* **********************************************************************
2025- * lowerShuffleToMove : lower a ShuffleInst (element type not i1) to a
2049+ * lowerShuffleToMove : lower a ShuffleInst (element type is not i1) to a
20262050 * sequence of rd/wrregion intrinsics
20272051 */
20282052void GenXLowering::lowerShuffleToMove (ShuffleVectorInst *SI) {
@@ -2052,8 +2076,7 @@ void GenXLowering::lowerShuffleToMove(ShuffleVectorInst *SI) {
20522076 std::transform (
20532077 RdRegions.begin (), RdRegions.end (), std::back_inserter (RdRegionInsts),
20542078 [SI](ShuffleVectorAnalyzer::OperandRegionInfo &OpRegion) -> Value * {
2055- if (cast<VectorType>(OpRegion.Op ->getType ())->getNumElements () ==
2056- OpRegion.R .NumElements )
2079+ if (OpRegion.R .isWhole (OpRegion.Op ->getType ()))
20572080 return OpRegion.Op ;
20582081 return OpRegion.R .createRdRegion (
20592082 OpRegion.Op , SI->getName () + " .shuffle.rd" , SI, SI->getDebugLoc ());
0 commit comments