From 85571ba7b46f5dbd3bebcd3f99b96c11ebe461dc Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Thu, 30 Jan 2025 15:14:18 -0800 Subject: [PATCH 1/7] griddepcontrol requires sm_90 --- llvm/test/CodeGen/NVPTX/griddepcontrol.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/NVPTX/griddepcontrol.ll b/llvm/test/CodeGen/NVPTX/griddepcontrol.ll index fe15b3fe4afbd..0bf9196aa2902 100644 --- a/llvm/test/CodeGen/NVPTX/griddepcontrol.ll +++ b/llvm/test/CodeGen/NVPTX/griddepcontrol.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mcpu=sm_90 -march=nvptx64 | FileCheck %s -; RUN: %if ptxas-11.8 %{ llc < %s -mcpu=sm_90 -march=nvptx64 | %ptxas-verify %} +; RUN: %if ptxas-11.8 %{ llc < %s -mcpu=sm_90 -march=nvptx64 | %ptxas-verify -arch=sm_90 %} define void @griddepcontrol() { ; CHECK-LABEL: griddepcontrol( From 492bb835a4d96f8b77f4d080ad9f9adcaf8a0f15 Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Thu, 30 Jan 2025 15:17:24 -0800 Subject: [PATCH 2/7] constant pointers not supported in Separate compilation --- llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll index 2889d2d89a857..f324998d9b33d 100644 --- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll +++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -o %t %} %struct.Large = type { [16 x double] } From 2ef92969885fd5be86f69f3bcef0ff0abd562569 Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Thu, 30 Jan 2025 15:43:19 -0800 Subject: [PATCH 3/7] constant pointers not supported in Separate compilation - lower-args.ll --- llvm/test/CodeGen/NVPTX/lower-args.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index 269bba75dc5fb..0deff5afd9310 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO ; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC ; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO -; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas -o %t %} target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" From 77f3afafb8d9b6085ee79b2fc81b30111256efd4 Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Thu, 30 Jan 2025 16:27:03 -0800 Subject: [PATCH 4/7] constant pointers not supported in Separate compilation --- llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 2 +- llvm/test/CodeGen/NVPTX/lower-args.ll | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll index f324998d9b33d..a56b85de80143 100644 --- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll +++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -o %t %} +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -arch=sm_60 - %} %struct.Large = type { [16 x double] } diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index 0deff5afd9310..925497748348c 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,IR,IRC ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO -; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC -; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO -; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas -o %t %} +; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC +; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO +; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas -arch=sm_52 - %} target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" @@ -145,4 +145,6 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st ; Function Attrs: convergent nounwind -declare dso_local ptr @escape(ptr) local_unnamed_addr +define dso_local ptr @escape(ptr) local_unnamed_addr { + ret ptr %0 +} From ce4cf633a565d63e3a2a3c79b7a7077fb0ee1f72 Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Fri, 31 Jan 2025 18:39:38 -0800 Subject: [PATCH 5/7] Use non-const addrspace --- llvm/test/CodeGen/NVPTX/lower-args.ll | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index 925497748348c..2c8496817e9d2 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO ; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC ; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO -; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas -arch=sm_52 - %} +; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas-verify %} target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" @@ -87,12 +87,12 @@ define ptx_kernel void @ptr_generic(ptr %out, ptr %in) { } ; COMMON-LABEL: ptr_nongeneric -define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) { +define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) { ; IR-NOT: addrspacecast ; PTX-NOT: cvta.to.global -; PTX: ld.const.u32 +; PTX: ld.shared.u32 ; PTX st.global.u32 - %v = load i32, ptr addrspace(4) %in, align 4 + %v = load i32, ptr addrspace(3) %in, align 4 store i32 %v, ptr addrspace(1) %out, align 4 ret void } @@ -145,6 +145,4 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st ; Function Attrs: convergent nounwind -define dso_local ptr @escape(ptr) local_unnamed_addr { - ret ptr %0 -} +declare dso_local ptr @escape(ptr) local_unnamed_addr \ No newline at end of file From f936cb6ccb97a56fc29a5ffa52e9251d6b7294f5 Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Fri, 31 Jan 2025 18:40:46 -0800 Subject: [PATCH 6/7] Add EOF newline --- llvm/test/CodeGen/NVPTX/lower-args.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index 2c8496817e9d2..cb3799d5e9e67 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -145,4 +145,4 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st ; Function Attrs: convergent nounwind -declare dso_local ptr @escape(ptr) local_unnamed_addr \ No newline at end of file +declare dso_local ptr @escape(ptr) local_unnamed_addr From e04a6bfe95a787d16d419cb4c24606f1dd8a108b Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Fri, 31 Jan 2025 18:42:00 -0800 Subject: [PATCH 7/7] Undo sm change --- llvm/test/CodeGen/NVPTX/lower-args.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index cb3799d5e9e67..81b86c86d40de 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,IR,IRC ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO -; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC -; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO -; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas-verify %} +; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC +; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO +; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda"