Skip to content

Commit 6a1daf0

Browse files
committed
address comments
1 parent 02ee37d commit 6a1daf0

File tree

1 file changed

+101
-85
lines changed

1 file changed

+101
-85
lines changed

mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp

Lines changed: 101 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,89 +1657,104 @@ NVVM::IDArgPair CpAsyncBulkTensorReduceOp::getIntrinsicIDAndArgs(
16571657
args.push_back(hasCacheHint ? mt.lookupValue(cacheHint) : i64ZeroValue);
16581658
args.push_back(builder.getInt1(hasCacheHint));
16591659

1660-
const unsigned NI = llvm::Intrinsic::not_intrinsic;
1661-
static constexpr llvm::Intrinsic::ID IDTable[][2][6] = {
1662-
// RedTy::ADD
1663-
{{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d,
1664-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d,
1665-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d,
1666-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d,
1667-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d},
1668-
{NI, NI, NI,
1669-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d,
1670-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d,
1671-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d}},
1672-
// RedTy::MIN
1673-
{{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d,
1674-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d,
1675-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d,
1676-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d,
1677-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d},
1678-
{NI, NI, NI,
1679-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d,
1680-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d,
1681-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d}},
1682-
// RedTy::MAX
1683-
{{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d,
1684-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d,
1685-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d,
1686-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d,
1687-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d},
1688-
{NI, NI, NI,
1689-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d,
1690-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d,
1691-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d}},
1692-
// RedTy::INC
1693-
{{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d,
1694-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d,
1695-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d,
1696-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d,
1697-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d},
1698-
{NI, NI, NI,
1699-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d,
1700-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d,
1701-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d}},
1702-
// RedTy::DEC
1703-
{{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d,
1704-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d,
1705-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d,
1706-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d,
1707-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d},
1708-
{NI, NI, NI,
1709-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d,
1710-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d,
1711-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d}},
1712-
// RedTy::AND
1713-
{{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d,
1714-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d,
1715-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d,
1716-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d,
1717-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d},
1718-
{NI, NI, NI,
1719-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d,
1720-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d,
1721-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d}},
1722-
// RedTy::OR
1723-
{{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d,
1724-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d,
1725-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d,
1726-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d,
1727-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d},
1728-
{NI, NI, NI,
1729-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d,
1730-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d,
1731-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d}},
1732-
// RedTy::XOR
1733-
{{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d,
1734-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d,
1735-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d,
1736-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d,
1737-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d},
1738-
{NI, NI, NI,
1739-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d,
1740-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d,
1741-
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d}},
1742-
};
1660+
const llvm::Intrinsic::ID noIntrinsic = llvm::Intrinsic::not_intrinsic;
1661+
1662+
constexpr unsigned numRedKinds = 8; // ADD, MIN, MAX, INC, DEC, AND, OR, XOR
1663+
constexpr unsigned numLayouts = 2; // TILE, IM2COL
1664+
constexpr unsigned maxDim = 5; // 1D to 5D
1665+
using row = std::array<llvm::Intrinsic::ID, maxDim + 1>;
1666+
using layoutTable = std::array<row, numLayouts>;
1667+
using fullTable = std::array<layoutTable, numRedKinds>;
1668+
static constexpr fullTable IDTable{
1669+
{// RedTy::ADD
1670+
{{{{noIntrinsic,
1671+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d,
1672+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d,
1673+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d,
1674+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d,
1675+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d}},
1676+
{{noIntrinsic, noIntrinsic, noIntrinsic,
1677+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d,
1678+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d,
1679+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d}}}},
1680+
// RedTy::MIN
1681+
{{{{noIntrinsic,
1682+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d,
1683+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d,
1684+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d,
1685+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d,
1686+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d}},
1687+
{{noIntrinsic, noIntrinsic, noIntrinsic,
1688+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d,
1689+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d,
1690+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d}}}},
1691+
// RedTy::MAX
1692+
{{{{noIntrinsic,
1693+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d,
1694+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d,
1695+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d,
1696+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d,
1697+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d}},
1698+
{{noIntrinsic, noIntrinsic, noIntrinsic,
1699+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d,
1700+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d,
1701+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d}}}},
1702+
// RedTy::INC
1703+
{{{{noIntrinsic,
1704+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d,
1705+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d,
1706+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d,
1707+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d,
1708+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d}},
1709+
{{noIntrinsic, noIntrinsic, noIntrinsic,
1710+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d,
1711+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d,
1712+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d}}}},
1713+
// RedTy::DEC
1714+
{{{{noIntrinsic,
1715+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d,
1716+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d,
1717+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d,
1718+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d,
1719+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d}},
1720+
{{noIntrinsic, noIntrinsic, noIntrinsic,
1721+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d,
1722+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d,
1723+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d}}}},
1724+
// RedTy::AND
1725+
{{{{noIntrinsic,
1726+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d,
1727+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d,
1728+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d,
1729+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d,
1730+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d}},
1731+
{{noIntrinsic, noIntrinsic, noIntrinsic,
1732+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d,
1733+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d,
1734+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d}}}},
1735+
// RedTy::OR
1736+
{{{{noIntrinsic,
1737+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d,
1738+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d,
1739+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d,
1740+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d,
1741+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d}},
1742+
{{noIntrinsic, noIntrinsic, noIntrinsic,
1743+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d,
1744+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d,
1745+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d}}}},
1746+
// RedTy::XOR
1747+
{{{{noIntrinsic,
1748+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d,
1749+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d,
1750+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d,
1751+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d,
1752+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d}},
1753+
{{noIntrinsic, noIntrinsic, noIntrinsic,
1754+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d,
1755+
llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d,
1756+
llvm::Intrinsic::
1757+
nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d}}}}}};
17431758

17441759
static_assert(getMaxEnumValForTMAReduxKind() == std::size(IDTable) - 1,
17451760
"TMAReduxKinds must match number of rows in IDTable");
@@ -1748,8 +1763,9 @@ NVVM::IDArgPair CpAsyncBulkTensorReduceOp::getIntrinsicIDAndArgs(
17481763
size_t mode = static_cast<size_t>(thisOp.getMode());
17491764
size_t dim = thisOp.getCoordinates().size();
17501765
llvm::Intrinsic::ID intrinsicID = IDTable[redKind][mode][dim];
1751-
if (intrinsicID == llvm::Intrinsic::not_intrinsic)
1752-
llvm_unreachable("Invalid intrinsic for CpAsyncBulkTensorReduceOp.");
1766+
1767+
assert(intrinsicID != noIntrinsic &&
1768+
"Invalid intrinsic for CpAsyncBulkTensorReduceOp.");
17531769

17541770
return {intrinsicID, std::move(args)};
17551771
}

0 commit comments

Comments
 (0)