@@ -1657,89 +1657,104 @@ NVVM::IDArgPair CpAsyncBulkTensorReduceOp::getIntrinsicIDAndArgs(
16571657 args.push_back (hasCacheHint ? mt.lookupValue (cacheHint) : i64ZeroValue);
16581658 args.push_back (builder.getInt1 (hasCacheHint));
16591659
1660- const unsigned NI = llvm::Intrinsic::not_intrinsic;
1661- static constexpr llvm::Intrinsic::ID IDTable[][2 ][6 ] = {
1662- // RedTy::ADD
1663- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d,
1664- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d,
1665- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d,
1666- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d,
1667- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d},
1668- {NI, NI, NI,
1669- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d,
1670- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d,
1671- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d}},
1672- // RedTy::MIN
1673- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d,
1674- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d,
1675- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d,
1676- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d,
1677- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d},
1678- {NI, NI, NI,
1679- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d,
1680- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d,
1681- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d}},
1682- // RedTy::MAX
1683- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d,
1684- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d,
1685- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d,
1686- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d,
1687- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d},
1688- {NI, NI, NI,
1689- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d,
1690- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d,
1691- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d}},
1692- // RedTy::INC
1693- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d,
1694- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d,
1695- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d,
1696- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d,
1697- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d},
1698- {NI, NI, NI,
1699- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d,
1700- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d,
1701- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d}},
1702- // RedTy::DEC
1703- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d,
1704- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d,
1705- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d,
1706- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d,
1707- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d},
1708- {NI, NI, NI,
1709- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d,
1710- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d,
1711- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d}},
1712- // RedTy::AND
1713- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d,
1714- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d,
1715- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d,
1716- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d,
1717- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d},
1718- {NI, NI, NI,
1719- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d,
1720- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d,
1721- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d}},
1722- // RedTy::OR
1723- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d,
1724- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d,
1725- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d,
1726- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d,
1727- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d},
1728- {NI, NI, NI,
1729- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d,
1730- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d,
1731- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d}},
1732- // RedTy::XOR
1733- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d,
1734- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d,
1735- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d,
1736- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d,
1737- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d},
1738- {NI, NI, NI,
1739- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d,
1740- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d,
1741- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d}},
1742- };
1660+ const llvm::Intrinsic::ID noIntrinsic = llvm::Intrinsic::not_intrinsic;
1661+
1662+ constexpr unsigned numRedKinds = 8 ; // ADD, MIN, MAX, INC, DEC, AND, OR, XOR
1663+ constexpr unsigned numLayouts = 2 ; // TILE, IM2COL
1664+ constexpr unsigned maxDim = 5 ; // 1D to 5D
1665+ using row = std::array<llvm::Intrinsic::ID, maxDim + 1 >;
1666+ using layoutTable = std::array<row, numLayouts>;
1667+ using fullTable = std::array<layoutTable, numRedKinds>;
1668+ static constexpr fullTable IDTable{
1669+ {// RedTy::ADD
1670+ {{{{noIntrinsic,
1671+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d,
1672+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d,
1673+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d,
1674+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d,
1675+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d}},
1676+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1677+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d,
1678+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d,
1679+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d}}}},
1680+ // RedTy::MIN
1681+ {{{{noIntrinsic,
1682+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d,
1683+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d,
1684+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d,
1685+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d,
1686+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d}},
1687+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1688+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d,
1689+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d,
1690+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d}}}},
1691+ // RedTy::MAX
1692+ {{{{noIntrinsic,
1693+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d,
1694+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d,
1695+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d,
1696+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d,
1697+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d}},
1698+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1699+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d,
1700+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d,
1701+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d}}}},
1702+ // RedTy::INC
1703+ {{{{noIntrinsic,
1704+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d,
1705+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d,
1706+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d,
1707+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d,
1708+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d}},
1709+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1710+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d,
1711+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d,
1712+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d}}}},
1713+ // RedTy::DEC
1714+ {{{{noIntrinsic,
1715+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d,
1716+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d,
1717+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d,
1718+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d,
1719+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d}},
1720+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1721+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d,
1722+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d,
1723+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d}}}},
1724+ // RedTy::AND
1725+ {{{{noIntrinsic,
1726+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d,
1727+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d,
1728+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d,
1729+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d,
1730+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d}},
1731+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1732+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d,
1733+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d,
1734+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d}}}},
1735+ // RedTy::OR
1736+ {{{{noIntrinsic,
1737+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d,
1738+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d,
1739+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d,
1740+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d,
1741+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d}},
1742+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1743+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d,
1744+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d,
1745+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d}}}},
1746+ // RedTy::XOR
1747+ {{{{noIntrinsic,
1748+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d,
1749+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d,
1750+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d,
1751+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d,
1752+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d}},
1753+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1754+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d,
1755+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d,
1756+ llvm::Intrinsic::
1757+ nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d}}}}}};
17431758
17441759 static_assert (getMaxEnumValForTMAReduxKind () == std::size (IDTable) - 1 ,
17451760 " TMAReduxKinds must match number of rows in IDTable" );
@@ -1748,8 +1763,9 @@ NVVM::IDArgPair CpAsyncBulkTensorReduceOp::getIntrinsicIDAndArgs(
17481763 size_t mode = static_cast <size_t >(thisOp.getMode ());
17491764 size_t dim = thisOp.getCoordinates ().size ();
17501765 llvm::Intrinsic::ID intrinsicID = IDTable[redKind][mode][dim];
1751- if (intrinsicID == llvm::Intrinsic::not_intrinsic)
1752- llvm_unreachable (" Invalid intrinsic for CpAsyncBulkTensorReduceOp." );
1766+
1767+ assert (intrinsicID != noIntrinsic &&
1768+ " Invalid intrinsic for CpAsyncBulkTensorReduceOp." );
17531769
17541770 return {intrinsicID, std::move (args)};
17551771}
0 commit comments