Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,8 +590,12 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
}
case Intrinsic::nvvm_prefetch_tensormap: {
IRBuilder<> Builder(II);
return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
NewV);
const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST ||
NewAS == NVPTXAS::ADDRESS_SPACE_PARAM)
return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
NewV);
return nullptr;
}
}
return nullptr;
Expand Down
48 changes: 44 additions & 4 deletions llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ target triple = "nvptx64-unknown-unknown"
define void @test_infer_const_from_cast() {
; INFER-LABEL: @test_infer_const_from_cast
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; BOTH: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; PTX-LABEL: .visible .func test_infer_const_from_cast(
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
Expand Down Expand Up @@ -69,12 +68,53 @@ entry:
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
call void @llvm.nvvm.prefetch.tensormap(ptr %cast3)
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
ret void
}

; Kernel Function Test
; Cast from Const space to Generic
define ptx_kernel void @test_const_to_generic_cast_kernel(ptr addrspace(4) %const_ptr) {
; INFER-LABEL: @test_const_to_generic_cast_kernel
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
; PTX-LABEL: .visible .entry test_const_to_generic_cast_kernel(
; PTX: prefetch.const.tensormap [%rd{{[0-9]+}}];
entry:
%cast = addrspacecast ptr addrspace(4) %const_ptr to ptr
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
ret void
}

; Kernel Function Test
; Cast from Param space to Generic
define ptx_kernel void @test_param_to_generic_cast_kernel(ptr addrspace(101) %param_ptr) {
; INFER-LABEL: @test_param_to_generic_cast_kernel
; INFER: call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
; PTX-LABEL: .visible .entry test_param_to_generic_cast_kernel(
; PTX: prefetch.param.tensormap [%rd{{[0-9]+}}];
entry:
%cast = addrspacecast ptr addrspace(101) %param_ptr to ptr
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
ret void
}

; Kernel Function Test
; Multiple casts in sequence
define ptx_kernel void @test_infer_through_multiple_casts_kernel() {
; INFER-LABEL: @test_infer_through_multiple_casts_kernel
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; PTX-LABEL: .visible .entry test_infer_through_multiple_casts_kernel(
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
; PTX: prefetch.tensormap [%rd{{[0-9]+}}];
entry:
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
ret void
}

declare void @llvm.nvvm.prefetch.tensormap.p0(ptr)
declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4))
declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101))


53 changes: 52 additions & 1 deletion llvm/test/CodeGen/NVPTX/prefetch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,55 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) {
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
ret void
}
}

define ptx_kernel void @prefetch_generic_tensormap_kernel(ptr %ptr) {
; CHECK-PTX64-LABEL: prefetch_generic_tensormap_kernel(
; CHECK-PTX64: {
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_kernel_param_0];
; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
ret void
}

define ptx_kernel void @prefetch_const_tensormap_kernel(ptr addrspace(4) %const_ptr) {
; CHECK-PTX64-LABEL: prefetch_const_tensormap_kernel(
; CHECK-PTX64: {
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_kernel_param_0];
; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
ret void
}

define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) {
; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel(
; CHECK-PTX64: {
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_param_tensormap_kernel_param_0];
; CHECK-PTX64-NEXT: prefetch.param.tensormap [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
ret void
}

define ptx_kernel void @prefetch_grid_const_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %ptr) {
; CHECK-PTX64-LABEL: .visible .entry prefetch_grid_const_tensormap(
; CHECK-PTX64: mov.b64 %rd{{[0-9]+}}, prefetch_grid_const_tensormap_param_0;
; CHECK-PTX64: cvta.param.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
; CHECK-PTX64: prefetch.tensormap [%rd{{[0-9]+}}];
; CHECK-PTX64: ret;

entry:
call void @llvm.nvvm.prefetch.tensormap.p0(ptr addrspace(0) %ptr)
ret void
}
Loading