Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,8 +590,12 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
}
case Intrinsic::nvvm_prefetch_tensormap: {
IRBuilder<> Builder(II);
return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
NewV);
const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST ||
NewAS == NVPTXAS::ADDRESS_SPACE_PARAM)
return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
NewV);
return nullptr;
}
}
return nullptr;
Expand Down
35 changes: 31 additions & 4 deletions llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ target triple = "nvptx64-unknown-unknown"
define void @test_infer_const_from_cast() {
; INFER-LABEL: @test_infer_const_from_cast
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; BOTH: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; PTX-LABEL: .visible .func test_infer_const_from_cast(
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
Expand Down Expand Up @@ -69,12 +68,40 @@ entry:
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
call void @llvm.nvvm.prefetch.tensormap(ptr %cast3)
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
ret void
}

; Kernel Function Test
; Cast from Param space to Generic
define ptx_kernel void @test_param_to_generic_cast_kernel(ptr addrspace(101) %param_ptr) {
; INFER-LABEL: @test_param_to_generic_cast_kernel
; INFER: call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
; PTX-LABEL: .visible .entry test_param_to_generic_cast_kernel(
; PTX: prefetch.param.tensormap [%rd{{[0-9]+}}];
entry:
%cast = addrspacecast ptr addrspace(101) %param_ptr to ptr
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
ret void
}

; Kernel Function Test
; Multiple casts in sequence
define ptx_kernel void @test_infer_through_multiple_casts_kernel() {
; INFER-LABEL: @test_infer_through_multiple_casts_kernel
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; PTX-LABEL: .visible .entry test_infer_through_multiple_casts_kernel(
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
; PTX: prefetch.tensormap [%rd{{[0-9]+}}];
entry:
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
ret void
}

declare void @llvm.nvvm.prefetch.tensormap.p0(ptr)
declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4))
declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101))


38 changes: 37 additions & 1 deletion llvm/test/CodeGen/NVPTX/prefetch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,40 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) {
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
ret void
}
}

define ptx_kernel void @prefetch_generic_tensormap_kernel(ptr %ptr) {
; CHECK-PTX64-LABEL: prefetch_generic_tensormap_kernel(
; CHECK-PTX64: {
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_kernel_param_0];
; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
ret void
}

define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) {
; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel(
; CHECK-PTX64: {
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_param_tensormap_kernel_param_0];
; CHECK-PTX64-NEXT: prefetch.param.tensormap [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
ret void
}

define ptx_kernel void @prefetch_grid_const_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %ptr) {
; CHECK-PTX64-LABEL: .visible .entry prefetch_grid_const_tensormap(
; CHECK-PTX64: prefetch.tensormap [%{{(SP|rd[0-9]+).*}}];
; CHECK-PTX64: ret;

entry:
call void @llvm.nvvm.prefetch.tensormap.p0(ptr addrspace(0) %ptr)
ret void
}