@@ -550,20 +550,15 @@ defm CP_ASYNC_BULK_CTA_TO_CLUSTER_SHARED32 : CP_ASYNC_BULK_CTA_TO_CLUSTER<Int32R
550550//------------------------------
551551// Bulk Copy Prefetch Functions
552552//------------------------------
553- multiclass CP_ASYNC_BULK_PREFETCH_INTR {
554- defvar prefetch = "cp.async.bulk.prefetch.L2.global";
555- def NAME: NVPTXInst<(outs),
556- (ins Int64Regs:$src, Int32Regs:$size),
557- !strconcat(prefetch, " [$src], $size;"), []>,
558- Requires<[hasPTX<80>, hasSM<90>]>;
559- def NAME # _CH: NVPTXInst<(outs),
560- (ins Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch),
561- !strconcat(prefetch, ".L2::cache_hint [$src], $size, $ch;"), []>,
562- Requires<[hasPTX<80>, hasSM<90>]>;
563- }
564-
565- defm CP_ASYNC_BULK_PREFETCH : CP_ASYNC_BULK_PREFETCH_INTR;
566-
553+ def CP_ASYNC_BULK_PREFETCH : NVPTXInst<(outs),
554+ (ins Int64Regs:$src, Int32Regs:$size),
555+ "cp.async.bulk.prefetch.L2.global [$src], $size;", []>,
556+ Requires<[hasPTX<80>, hasSM<90>]>;
557+
558+ def CP_ASYNC_BULK_PREFETCH_CH : NVPTXInst<(outs),
559+ (ins Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch),
560+ "cp.async.bulk.prefetch.L2.global.L2::cache_hint [$src], $size, $ch;", []>,
561+ Requires<[hasPTX<80>, hasSM<90>]>;
567562//-------------------------------------
568563// TMA Async Bulk Tensor Copy Functions
569564//-------------------------------------
0 commit comments