22; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO
33; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
44; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
5- ; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas -arch=sm_52 - %}
5+ ; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas-verify %}
66
77target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
88target triple = "nvptx64-nvidia-cuda"
@@ -87,12 +87,12 @@ define ptx_kernel void @ptr_generic(ptr %out, ptr %in) {
8787}
8888
8989; COMMON-LABEL: ptr_nongeneric
90- define ptx_kernel void @ptr_nongeneric (ptr addrspace (1 ) %out , ptr addrspace (4 ) %in ) {
90+ define ptx_kernel void @ptr_nongeneric (ptr addrspace (1 ) %out , ptr addrspace (3 ) %in ) {
9191; IR-NOT: addrspacecast
9292; PTX-NOT: cvta.to.global
93- ; PTX: ld.const .u32
93+ ; PTX: ld.shared .u32
9494; PTX st.global.u32
95- %v = load i32 , ptr addrspace (4 ) %in , align 4
95+ %v = load i32 , ptr addrspace (3 ) %in , align 4
9696 store i32 %v , ptr addrspace (1 ) %out , align 4
9797 ret void
9898}
@@ -145,6 +145,4 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st
145145
146146
147147; Function Attrs: convergent nounwind
148- define dso_local ptr @escape (ptr ) local_unnamed_addr {
149- ret ptr %0
150- }
148+ declare dso_local ptr @escape (ptr ) local_unnamed_addr
0 commit comments