@@ -4175,6 +4175,10 @@ bool NVPTXScopes::empty() const { return Scopes.size() == 0; }
41754175 return CP_ASYNC_BULK_TENSOR_OPCODE (G2S, dim, mode, ); \
41764176 }()
41774177
4178+ #define GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH (dim, mode ) \
4179+ (IsCacheHint ? NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode##_CH \
4180+ : NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode)
4181+
41784182static unsigned GetCpAsyncBulkTensorS2GOpcode (size_t Dim, bool IsShared32,
41794183 bool IsCacheHint, bool IsIm2Col) {
41804184 if (IsIm2Col) {
@@ -4242,6 +4246,55 @@ static unsigned GetCpAsyncBulkTensorG2SOpcode(size_t Dim, bool IsShared32,
42424246 }
42434247}
42444248
4249+ static unsigned GetCpAsyncBulkTensorPrefetchOpcode (size_t Dim, bool IsCacheHint,
4250+ bool IsIm2Col) {
4251+ if (IsIm2Col) {
4252+ switch (Dim) {
4253+ case 3 :
4254+ return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH (3D, IM2COL);
4255+ case 4 :
4256+ return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH (4D, IM2COL);
4257+ case 5 :
4258+ return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH (5D, IM2COL);
4259+ default :
4260+ llvm_unreachable (" Invalid Dimension in im2col mode for "
4261+ " GetCpAsyncBulkTensorPrefetchOpcode." );
4262+ }
4263+ } else {
4264+ switch (Dim) {
4265+ case 1 :
4266+ return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH (1D, TILE);
4267+ case 2 :
4268+ return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH (2D, TILE);
4269+ case 3 :
4270+ return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH (3D, TILE);
4271+ case 4 :
4272+ return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH (4D, TILE);
4273+ case 5 :
4274+ return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH (5D, TILE);
4275+ default :
4276+ llvm_unreachable (" Invalid Dimension in tile mode for "
4277+ " GetCpAsyncBulkTensorPrefetchOpcode." );
4278+ }
4279+ }
4280+ }
4281+
4282+ static size_t GetDimsFromIntrinsic (unsigned IID) {
4283+ switch (IID) {
4284+ case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4285+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d:
4286+ return 3 ;
4287+ case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4288+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d:
4289+ return 4 ;
4290+ case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4291+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d:
4292+ return 5 ;
4293+ default :
4294+ llvm_unreachable (" Invalid im2col intrinsic in GetDimsFromIntrinsic." );
4295+ }
4296+ }
4297+
42454298void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorG2SCommon (SDNode *N,
42464299 bool IsIm2Col) {
42474300 // We have {Chain, Intrinsic-ID} followed by the actual intrisic args:
@@ -4250,21 +4303,8 @@ void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorG2SCommon(SDNode *N,
42504303 // multicast_flag, cache_hint_flag}
42514304 // NumOperands = {Chain, IID} + {Actual intrinsic args}
42524305 // = {2} + {7 + dims + im2col_offsets}
4253- auto getDimsFromIntrinsic = [](unsigned IID) {
4254- switch (IID) {
4255- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4256- return 3 ;
4257- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4258- return 4 ;
4259- case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4260- return 5 ;
4261- default :
4262- llvm_unreachable (
4263- " Invalid im2col intrinsic in SelectCpAsyncBulkTensorG2SCommon." );
4264- }
4265- };
42664306 size_t NumOps = N->getNumOperands ();
4267- size_t NumDims = IsIm2Col ? getDimsFromIntrinsic (N->getConstantOperandVal (1 ))
4307+ size_t NumDims = IsIm2Col ? GetDimsFromIntrinsic (N->getConstantOperandVal (1 ))
42684308 : (NumOps - 9 );
42694309 // Offsets is always 'NumDims - 2' and only for im2col mode
42704310 size_t NumOffsets = IsIm2Col ? (NumDims - 2 ) : 0 ;
@@ -4316,6 +4356,30 @@ void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorS2GCommon(SDNode *N,
43164356 ReplaceNode (N, CurDAG->getMachineNode (Opcode, DL, N->getVTList (), Ops));
43174357}
43184358
4359+ void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorPrefetchCommon (SDNode *N,
4360+ bool IsIm2Col) {
4361+ // We have {Chain, Intrinsic-ID} followed by the actual intrisic args:
4362+ // {src, dims{d0...dN}, im2col_offsets{dims-2}
4363+ // cache_hint, cache_hint_flag}
4364+ // NumOperands = {Chain, IID} + {Actual intrinsic args}
4365+ // = {2} + {3 + dims + im2col_offsets}
4366+ size_t NumOps = N->getNumOperands ();
4367+ size_t NumDims = IsIm2Col ? GetDimsFromIntrinsic (N->getConstantOperandVal (1 ))
4368+ : (NumOps - 5 );
4369+ // Offsets is always 'NumDims - 2' and only for im2col mode
4370+ size_t NumOffsets = IsIm2Col ? (NumDims - 2 ) : 0 ;
4371+ bool IsCacheHint = N->getConstantOperandVal (NumOps - 1 ) == 1 ;
4372+ size_t NumArgs = NumDims + NumOffsets + (IsCacheHint ? 2 : 1 );
4373+
4374+ SDLoc DL (N);
4375+ SmallVector<SDValue, 12 > Ops (N->ops ().slice (2 , NumArgs));
4376+ Ops.push_back (N->getOperand (0 )); // Chain operand
4377+
4378+ unsigned Opcode =
4379+ GetCpAsyncBulkTensorPrefetchOpcode (NumDims, IsCacheHint, IsIm2Col);
4380+ ReplaceNode (N, CurDAG->getMachineNode (Opcode, DL, N->getVTList (), Ops));
4381+ }
4382+
43194383bool NVPTXDAGToDAGISel::tryIntrinsicVoid (SDNode *N) {
43204384 unsigned IID = N->getConstantOperandVal (1 );
43214385 switch (IID) {
@@ -4345,5 +4409,17 @@ bool NVPTXDAGToDAGISel::tryIntrinsicVoid(SDNode *N) {
43454409 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
43464410 SelectCpAsyncBulkTensorG2SCommon (N, /* IsIm2Col=*/ true );
43474411 return true ;
4412+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_1d:
4413+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_2d:
4414+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_3d:
4415+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_4d:
4416+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_5d:
4417+ SelectCpAsyncBulkTensorPrefetchCommon (N);
4418+ return true ;
4419+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d:
4420+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d:
4421+ case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d:
4422+ SelectCpAsyncBulkTensorPrefetchCommon (N, /* IsIm2Col=*/ true );
4423+ return true ;
43484424 }
43494425}
0 commit comments