From a9c1073c2958139f02fa9b3c639a98a548fd10e2 Mon Sep 17 00:00:00 2001 From: Princeton Ferro Date: Mon, 21 Apr 2025 17:22:54 -0700 Subject: [PATCH] [NVPTX] Add support for PTX ISA v8.8 Support PTX version 8.8 (`-mattr=+ptx88`) from CUDA 12.9. The following new targets are also added: SM103 and SM121: sm_103, sm_103a, sm_121, sm_121a. Also, some things were reformatted. https://docs.nvidia.com/cuda/parallel-thread-execution/#changes-in-ptx-isa-version-8-8 --- llvm/lib/Target/NVPTX/NVPTX.td | 62 +++++++++++++++------------ llvm/test/CodeGen/NVPTX/sm-version.ll | 16 +++++++ 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index 5467ae011a208..ff9a187ecf723 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -36,17 +36,21 @@ class FeaturePTX: foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, 60, 61, 62, 70, 72, 75, 80, 86, 87, - 89, 90, 100, 101, 120] in + 89, 90, 100, 101, 103, 120, 121] in def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>; -def SM90a: FeatureSM<"90a", 901>; +// Arch-specific targets. PTX for these is not compatible with any other +// architectures. +def SM90a : FeatureSM<"90a", 901>; def SM100a: FeatureSM<"100a", 1001>; def SM101a: FeatureSM<"101a", 1011>; +def SM103a: FeatureSM<"103a", 1031>; def SM120a: FeatureSM<"120a", 1201>; +def SM121a: FeatureSM<"121a", 1211>; foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, 70, 71, 72, 73, 74, 75, 76, 77, 78, - 80, 81, 82, 83, 84, 85, 86, 87] in + 80, 81, 82, 83, 84, 85, 86, 87, 88] in def PTX#version: FeaturePTX; //===----------------------------------------------------------------------===// @@ -56,33 +60,37 @@ foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, class Proc Features> : Processor; -def : Proc<"sm_20", [SM20, PTX32]>; -def : Proc<"sm_21", [SM21, PTX32]>; -def : Proc<"sm_30", [SM30]>; -def : Proc<"sm_32", [SM32, PTX40]>; -def : Proc<"sm_35", [SM35, PTX32]>; -def : Proc<"sm_37", [SM37, PTX41]>; -def : Proc<"sm_50", [SM50, PTX40]>; -def : Proc<"sm_52", [SM52, PTX41]>; -def : Proc<"sm_53", [SM53, PTX42]>; -def : Proc<"sm_60", [SM60, PTX50]>; -def : Proc<"sm_61", [SM61, PTX50]>; -def : Proc<"sm_62", [SM62, PTX50]>; -def : Proc<"sm_70", [SM70, PTX60]>; -def : Proc<"sm_72", [SM72, PTX61]>; -def : Proc<"sm_75", [SM75, PTX63]>; -def : Proc<"sm_80", [SM80, PTX70]>; -def : Proc<"sm_86", [SM86, PTX71]>; -def : Proc<"sm_87", [SM87, PTX74]>; -def : Proc<"sm_89", [SM89, PTX78]>; -def : Proc<"sm_90", [SM90, PTX78]>; -def : Proc<"sm_90a", [SM90a, PTX80]>; -def : Proc<"sm_100", [SM100, PTX86]>; +def : Proc<"sm_20", [SM20, PTX32]>; +def : Proc<"sm_21", [SM21, PTX32]>; +def : Proc<"sm_30", [SM30]>; +def : Proc<"sm_32", [SM32, PTX40]>; +def : Proc<"sm_35", [SM35, PTX32]>; +def : Proc<"sm_37", [SM37, PTX41]>; +def : Proc<"sm_50", [SM50, PTX40]>; +def : Proc<"sm_52", [SM52, PTX41]>; +def : Proc<"sm_53", [SM53, PTX42]>; +def : Proc<"sm_60", [SM60, PTX50]>; +def : Proc<"sm_61", [SM61, PTX50]>; +def : Proc<"sm_62", [SM62, PTX50]>; +def : Proc<"sm_70", [SM70, PTX60]>; +def : Proc<"sm_72", [SM72, PTX61]>; +def : Proc<"sm_75", [SM75, PTX63]>; +def : Proc<"sm_80", [SM80, PTX70]>; +def : Proc<"sm_86", [SM86, PTX71]>; +def : Proc<"sm_87", [SM87, PTX74]>; +def : Proc<"sm_89", [SM89, PTX78]>; +def : Proc<"sm_90", [SM90, PTX78]>; +def : Proc<"sm_90a", [SM90a, PTX80]>; +def : Proc<"sm_100", [SM100, PTX86]>; def : Proc<"sm_100a", [SM100a, PTX86]>; -def : Proc<"sm_101", [SM101, PTX86]>; +def : Proc<"sm_101", [SM101, PTX86]>; def : Proc<"sm_101a", [SM101a, PTX86]>; -def : Proc<"sm_120", [SM120, PTX87]>; +def : Proc<"sm_103", [SM103, PTX88]>; +def : Proc<"sm_103a", [SM103a, PTX88]>; +def : Proc<"sm_120", [SM120, PTX87]>; def : Proc<"sm_120a", [SM120a, PTX87]>; +def : Proc<"sm_121", [SM121, PTX88]>; +def : Proc<"sm_121a", [SM121a, PTX88]>; def NVPTXInstrInfo : InstrInfo { } diff --git a/llvm/test/CodeGen/NVPTX/sm-version.ll b/llvm/test/CodeGen/NVPTX/sm-version.ll index ce9a1b1b161dc..9705a2f3ba730 100644 --- a/llvm/test/CodeGen/NVPTX/sm-version.ll +++ b/llvm/test/CodeGen/NVPTX/sm-version.ll @@ -20,8 +20,12 @@ ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101 | FileCheck %s --check-prefix=SM101 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103 | FileCheck %s --check-prefix=SM103 +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120 | FileCheck %s --check-prefix=SM120 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121 | FileCheck %s --check-prefix=SM121 +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_21 | FileCheck %s --check-prefix=SM21 @@ -45,8 +49,12 @@ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101 | FileCheck %s --check-prefix=SM101 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103 | FileCheck %s --check-prefix=SM103 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120 | FileCheck %s --check-prefix=SM120 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121 | FileCheck %s --check-prefix=SM121 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a ; SM20: .version 3.2 ; SM21: .version 3.2 @@ -70,8 +78,12 @@ ; SM100a: .version 8.6 ; SM101: .version 8.6 ; SM101a: .version 8.6 +; SM103: .version 8.8 +; SM103a: .version 8.8 ; SM120: .version 8.7 ; SM120a: .version 8.7 +; SM121: .version 8.8 +; SM121a: .version 8.8 ; SM20: .target sm_20 ; SM21: .target sm_21 @@ -95,5 +107,9 @@ ; SM100a: .target sm_100a ; SM101: .target sm_101 ; SM101a: .target sm_101a +; SM103: .target sm_103 +; SM103a: .target sm_103a ; SM120: .target sm_120 ; SM120a: .target sm_120a +; SM121: .target sm_121 +; SM121a: .target sm_121a