@@ -1657,89 +1657,104 @@ NVVM::IDArgPair CpAsyncBulkTensorReduceOp::getIntrinsicIDAndArgs(
1657
1657
args.push_back (hasCacheHint ? mt.lookupValue (cacheHint) : i64ZeroValue);
1658
1658
args.push_back (builder.getInt1 (hasCacheHint));
1659
1659
1660
- const unsigned NI = llvm::Intrinsic::not_intrinsic;
1661
- static constexpr llvm::Intrinsic::ID IDTable[][2 ][6 ] = {
1662
- // RedTy::ADD
1663
- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d,
1664
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d,
1665
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d,
1666
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d,
1667
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d},
1668
- {NI, NI, NI,
1669
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d,
1670
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d,
1671
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d}},
1672
- // RedTy::MIN
1673
- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d,
1674
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d,
1675
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d,
1676
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d,
1677
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d},
1678
- {NI, NI, NI,
1679
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d,
1680
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d,
1681
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d}},
1682
- // RedTy::MAX
1683
- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d,
1684
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d,
1685
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d,
1686
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d,
1687
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d},
1688
- {NI, NI, NI,
1689
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d,
1690
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d,
1691
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d}},
1692
- // RedTy::INC
1693
- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d,
1694
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d,
1695
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d,
1696
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d,
1697
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d},
1698
- {NI, NI, NI,
1699
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d,
1700
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d,
1701
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d}},
1702
- // RedTy::DEC
1703
- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d,
1704
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d,
1705
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d,
1706
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d,
1707
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d},
1708
- {NI, NI, NI,
1709
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d,
1710
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d,
1711
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d}},
1712
- // RedTy::AND
1713
- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d,
1714
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d,
1715
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d,
1716
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d,
1717
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d},
1718
- {NI, NI, NI,
1719
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d,
1720
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d,
1721
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d}},
1722
- // RedTy::OR
1723
- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d,
1724
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d,
1725
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d,
1726
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d,
1727
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d},
1728
- {NI, NI, NI,
1729
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d,
1730
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d,
1731
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d}},
1732
- // RedTy::XOR
1733
- {{NI, llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d,
1734
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d,
1735
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d,
1736
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d,
1737
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d},
1738
- {NI, NI, NI,
1739
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d,
1740
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d,
1741
- llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d}},
1742
- };
1660
+ const llvm::Intrinsic::ID noIntrinsic = llvm::Intrinsic::not_intrinsic;
1661
+
1662
+ constexpr unsigned numRedKinds = 8 ; // ADD, MIN, MAX, INC, DEC, AND, OR, XOR
1663
+ constexpr unsigned numLayouts = 2 ; // TILE, IM2COL
1664
+ constexpr unsigned maxDim = 5 ; // 1D to 5D
1665
+ using row = std::array<llvm::Intrinsic::ID, maxDim + 1 >;
1666
+ using layoutTable = std::array<row, numLayouts>;
1667
+ using fullTable = std::array<layoutTable, numRedKinds>;
1668
+ static constexpr fullTable IDTable{
1669
+ {// RedTy::ADD
1670
+ {{{{noIntrinsic,
1671
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d,
1672
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d,
1673
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d,
1674
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d,
1675
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d}},
1676
+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1677
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d,
1678
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d,
1679
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d}}}},
1680
+ // RedTy::MIN
1681
+ {{{{noIntrinsic,
1682
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d,
1683
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d,
1684
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d,
1685
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d,
1686
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d}},
1687
+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1688
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d,
1689
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d,
1690
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d}}}},
1691
+ // RedTy::MAX
1692
+ {{{{noIntrinsic,
1693
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d,
1694
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d,
1695
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d,
1696
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d,
1697
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d}},
1698
+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1699
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d,
1700
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d,
1701
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d}}}},
1702
+ // RedTy::INC
1703
+ {{{{noIntrinsic,
1704
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d,
1705
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d,
1706
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d,
1707
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d,
1708
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d}},
1709
+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1710
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d,
1711
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d,
1712
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d}}}},
1713
+ // RedTy::DEC
1714
+ {{{{noIntrinsic,
1715
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d,
1716
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d,
1717
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d,
1718
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d,
1719
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d}},
1720
+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1721
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d,
1722
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d,
1723
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d}}}},
1724
+ // RedTy::AND
1725
+ {{{{noIntrinsic,
1726
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d,
1727
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d,
1728
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d,
1729
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d,
1730
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d}},
1731
+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1732
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d,
1733
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d,
1734
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d}}}},
1735
+ // RedTy::OR
1736
+ {{{{noIntrinsic,
1737
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d,
1738
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d,
1739
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d,
1740
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d,
1741
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d}},
1742
+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1743
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d,
1744
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d,
1745
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d}}}},
1746
+ // RedTy::XOR
1747
+ {{{{noIntrinsic,
1748
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d,
1749
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d,
1750
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d,
1751
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d,
1752
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d}},
1753
+ {{noIntrinsic, noIntrinsic, noIntrinsic,
1754
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d,
1755
+ llvm::Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d,
1756
+ llvm::Intrinsic::
1757
+ nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d}}}}}};
1743
1758
1744
1759
static_assert (getMaxEnumValForTMAReduxKind () == std::size (IDTable) - 1 ,
1745
1760
" TMAReduxKinds must match number of rows in IDTable" );
@@ -1748,8 +1763,9 @@ NVVM::IDArgPair CpAsyncBulkTensorReduceOp::getIntrinsicIDAndArgs(
1748
1763
size_t mode = static_cast <size_t >(thisOp.getMode ());
1749
1764
size_t dim = thisOp.getCoordinates ().size ();
1750
1765
llvm::Intrinsic::ID intrinsicID = IDTable[redKind][mode][dim];
1751
- if (intrinsicID == llvm::Intrinsic::not_intrinsic)
1752
- llvm_unreachable (" Invalid intrinsic for CpAsyncBulkTensorReduceOp." );
1766
+
1767
+ assert (intrinsicID != noIntrinsic &&
1768
+ " Invalid intrinsic for CpAsyncBulkTensorReduceOp." );
1753
1769
1754
1770
return {intrinsicID, std::move (args)};
1755
1771
}
0 commit comments