@@ -2648,8 +2648,7 @@ def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc"> {
26482648 the amount specified by `nCols` and writes the destination
26492649 address to the `addr` argument. The `nCols` operand specifies the
26502650 number of columns to be allocated and it must be a power-of-two.
2651- [For more information, refer to the PTX ISA]
2652- (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
2651+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
26532652 }];
26542653
26552654 let arguments = (ins
@@ -2679,8 +2678,7 @@ def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc"> {
26792678 specified by `tmemAddr`, which must be from a previous tensor
26802679 memory allocation. The `nCols` operand specifies the number
26812680 of columns to be de-allocated, and it must be a power-of-two.
2682- [For more information, refer to the PTX ISA]
2683- (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
2681+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
26842682 }];
26852683
26862684 let arguments = (ins LLVM_PointerTensor:$taddr, I32:$nCols,
@@ -2708,8 +2706,7 @@ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_perm
27082706 of the executing thread is relinquishing the right to allocate
27092707 Tensor Memory. So, it is illegal for a CTA to perform `tcgen05.alloc`
27102708 after any of its constituent threads execute `tcgen05.relinquish_alloc_permit`.
2711- [For more information, refer to the PTX ISA]
2712- (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
2709+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
27132710 }];
27142711
27152712 let arguments = (ins
@@ -2733,8 +2730,7 @@ def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence"> {
27332730 The `tcgen05.fence<after>` orders all subsequent async tcgen05 operations
27342731 with respect to the prior tcgen05 and execution ordering operations.
27352732
2736- [For more information refer to the PTX ISA]
2737- (https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-fence)
2733+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-fence)
27382734 }];
27392735
27402736 let arguments = (ins Tcgen05FenceKindAttr:$kind);
@@ -2756,8 +2752,7 @@ def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait"> {
27562752 have completed. Similarly, the `tcgen05.wait<store>` causes the executing
27572753 thread to block until all prior `tcgen05.st` operations issued by the
27582754 executing thread have completed.
2759- [For more information refer PTX ISA]
2760- (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-wait)
2755+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-wait)
27612756 }];
27622757
27632758 let arguments = (ins Tcgen05WaitKindAttr:$kind);
@@ -2782,8 +2777,7 @@ def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit"> {
27822777 when present, specifies the destination CTAs in the cluster such
27832778 that each bit position in the 16-bit `multicastMask` operand
27842779 corresponds to the `nvvm.read.ptx.sreg.ctaid` of the destination CTA.
2785- [For more information refer PTX ISA]
2786- (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen-async-sync-operations-commit)
2780+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen-async-sync-operations-commit)
27872781 }];
27882782
27892783 let arguments = (ins
0 commit comments