Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,8 +590,12 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
}
case Intrinsic::nvvm_prefetch_tensormap: {
IRBuilder<> Builder(II);
return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
NewV);
const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST ||
NewAS == NVPTXAS::ADDRESS_SPACE_PARAM)
return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
NewV);
return nullptr;
}
}
return nullptr;
Expand Down
45 changes: 43 additions & 2 deletions llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; RUN: opt < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix=INFER
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | FileCheck %s --check-prefix=PTX
; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | %ptxas-verify -arch=sm_90 %}
; RUN: not llc -march=nvptx64 %s -o - 2>&1 | FileCheck %s --check-prefix=ERR
; XFAIL: *

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx64-unknown-unknown"
Expand All @@ -11,7 +13,6 @@ target triple = "nvptx64-unknown-unknown"
define void @test_infer_const_from_cast() {
; INFER-LABEL: @test_infer_const_from_cast
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; BOTH: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; PTX-LABEL: .visible .func test_infer_const_from_cast(
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
Expand Down Expand Up @@ -69,7 +70,47 @@ entry:
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
call void @llvm.nvvm.prefetch.tensormap(ptr %cast3)
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
ret void
}

; Kernel Function Test
; Cast from Const space to Generic
define ptx_kernel void @test_const_to_generic_cast_kernel(ptr addrspace(4) %const_ptr) {
; INFER-LABEL: @test_const_to_generic_cast_kernel
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
; PTX-LABEL: .visible .entry test_const_to_generic_cast_kernel(
; PTX: prefetch.const.tensormap [%rd{{[0-9]+}}];
entry:
%cast = addrspacecast ptr addrspace(4) %const_ptr to ptr
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
ret void
}

; Kernel Function Test
; Multiple casts in sequence
define ptx_kernel void @test_infer_through_multiple_casts_kernel() {
; INFER-LABEL: @test_infer_through_multiple_casts_kernel
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; PTX-LABEL: .visible .entry test_infer_through_multiple_casts_kernel(
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
; PTX: prefetch.tensormap [%rd{{[0-9]+}}];
entry:
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
ret void
}


; Negative test case for global to generic addrspace cast
define void @test_global_to_generic_cast(ptr addrspace(1) %global_ptr) {
; ERR: unsupported prefetch address space cast
entry:
%cast = addrspacecast ptr addrspace(1) %global_ptr to ptr
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
ret void
}

Expand Down
43 changes: 42 additions & 1 deletion llvm/test/CodeGen/NVPTX/prefetch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,45 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) {
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
ret void
}
}

define ptx_kernel void @prefetch_tensormap_kernel(ptr %ptr) {
; CHECK-PTX64-LABEL: prefetch_tensormap_kernel(
; CHECK-PTX64: {
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_tensormap_kernel_param_0];
; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
ret void
}

define ptx_kernel void @prefetch_const_tensormap_kernel(ptr addrspace(4) %const_ptr) {
; CHECK-PTX64-LABEL: prefetch_const_tensormap_kernel(
; CHECK-PTX64: {
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_kernel_param_0];
; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
ret void
}

define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) {
; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel(
; CHECK-PTX64: {
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX64-EMPTY:
; CHECK-PTX64-NEXT: // %bb.0:
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_param_tensormap_kernel_param_0];
; CHECK-PTX64-NEXT: prefetch.param.tensormap [%rd1];
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
ret void
}