Skip to content

Commit 885da07

Browse files
authored
[NVPTX] Update architecture support checks for tcgen05 intrinsics (llvm#161519)
This change updates architecture support checks for tcgen05 intrinsics (except tcgen05.mma.*). The newer checks will support family-specific architecture variants as well. After this change, the arch checks will be accurate and match with PTX ISA. Intrinsics affected: - tcgen05.ld/st - tcgen05.alloc/dealloc/relinquish - tcgen05.cp - tcgen05.fence/wait - tcgen05.commit - tcgen05.shift
1 parent 0dd51f9 commit 885da07

File tree

12 files changed

+136
-16
lines changed

12 files changed

+136
-16
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,10 @@ static unsigned getTcgen05LdOpcode(unsigned IID, bool enablePack) {
280280
}
281281

282282
void NVPTXDAGToDAGISel::SelectTcgen05Ld(SDNode *N, bool hasOffset) {
283+
if (!Subtarget->hasTcgen05InstSupport())
284+
report_fatal_error(
285+
"tcgen05.ld is not supported on this architecture variant");
286+
283287
SDLoc DL(N);
284288
unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
285289

@@ -2136,6 +2140,10 @@ static unsigned getTcgen05StOpcode(unsigned IID, bool enableUnpack) {
21362140
}
21372141

21382142
void NVPTXDAGToDAGISel::SelectTcgen05St(SDNode *N, bool hasOffset) {
2143+
if (!Subtarget->hasTcgen05InstSupport())
2144+
report_fatal_error(
2145+
"tcgen05.st is not supported on this architecture variant");
2146+
21392147
SDLoc DL(N);
21402148
unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
21412149

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,22 @@ def PrmtMode : Operand<i32> {
101101
// NVPTX Instruction Predicate Definitions
102102
//===----------------------------------------------------------------------===//
103103

104+
// Checks PTX version and family-specific and architecture-specific SM versions.
105+
// For example, sm_100{f/a} and any future variants in the same family will match
106+
// for any PTX version greater than or equal to `PTXVersion`.
107+
class PTXWithFamilySMs<int PTXVersion, list<int> SMVersions> :
108+
Predicate<"Subtarget->hasPTXWithFamilySMs(" # PTXVersion # ", {" #
109+
!interleave(SMVersions, ", ") # "})">;
110+
111+
// Checks PTX version and architecture-specific SM versions.
112+
// For example, sm_100{a} will match for any PTX version
113+
// greater than or equal to `PTXVersion`.
114+
class PTXWithAccelSMs<int PTXVersion, list<int> SMVersions> :
115+
Predicate<"Subtarget->hasPTXWithAccelSMs(" # PTXVersion # ", {" #
116+
!interleave(SMVersions, ", ") # "})">;
117+
118+
// Helper predicate to call a subtarget method.
119+
class callSubtarget<string SubtargetMethod> : Predicate<"Subtarget->" # SubtargetMethod # "()">;
104120

105121
def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">;
106122
def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5103,8 +5103,8 @@ let Predicates = [hasSM<90>, hasPTX<78>] in {
51035103
def EXIT : NullaryInst<"exit", int_nvvm_exit>;
51045104

51055105
// Tcgen05 intrinsics
5106-
let isConvergent = true, Predicates = [hasTcgen05Instructions] in {
5107-
5106+
let isConvergent = true in {
5107+
let Predicates = [callSubtarget<"hasTcgen05InstSupport">] in {
51085108
multiclass TCGEN05_ALLOC_INTR<string AS, string num, Intrinsic Intr> {
51095109
def "" : BasicNVPTXInst<(outs),
51105110
(ins ADDR:$dst, B32:$ncols),
@@ -5156,15 +5156,6 @@ defm TCGEN05_COMMIT_CG2 : TCGEN05_COMMIT_INTR<"", "2">;
51565156
defm TCGEN05_COMMIT_S64_CG1 : TCGEN05_COMMIT_INTR<"shared", "1">;
51575157
defm TCGEN05_COMMIT_S64_CG2 : TCGEN05_COMMIT_INTR<"shared", "2">;
51585158

5159-
multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
5160-
def "" : BasicNVPTXInst<(outs),
5161-
(ins ADDR:$tmem_addr),
5162-
"tcgen05.shift.cta_group::" # num # ".down",
5163-
[(Intr addr:$tmem_addr)]>;
5164-
}
5165-
defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
5166-
defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;
5167-
51685159
multiclass TCGEN05_CP_INTR<string shape, string src_fmt, string mc = ""> {
51695160
defvar dst_fmt = !if(!eq(src_fmt, ""), "", ".b8x16");
51705161
defvar fmt_asm = StrJoin<".", [dst_fmt, src_fmt]>.ret;
@@ -5195,9 +5186,22 @@ foreach src_fmt = ["", "b6x16_p32", "b4x16_p64"] in {
51955186
defm TCGEN05_CP_64x128_2 # src_fmt : TCGEN05_CP_INTR<"64x128b", src_fmt, "warpx2::01_23">;
51965187
defm TCGEN05_CP_32x128 # src_fmt : TCGEN05_CP_INTR<"32x128b", src_fmt, "warpx4">;
51975188
}
5189+
} // Predicates
5190+
5191+
let Predicates = [callSubtarget<"hasTcgen05ShiftSupport">] in {
5192+
multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
5193+
def "" : BasicNVPTXInst<(outs),
5194+
(ins ADDR:$tmem_addr),
5195+
"tcgen05.shift.cta_group::" # num # ".down",
5196+
[(Intr addr:$tmem_addr)]>;
5197+
}
5198+
defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
5199+
defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;
5200+
} // Predicates
5201+
51985202
} // isConvergent
51995203

5200-
let hasSideEffects = 1, Predicates = [hasTcgen05Instructions] in {
5204+
let hasSideEffects = 1, Predicates = [callSubtarget<"hasTcgen05InstSupport">] in {
52015205

52025206
def tcgen05_fence_before_thread_sync: NullaryInst<
52035207
"tcgen05.fence::before_thread_sync", int_nvvm_tcgen05_fence_before_thread_sync>;
@@ -5231,8 +5235,7 @@ class TCGEN05_LDST_REGINFO<int Veclen> {
52315235
//
52325236

52335237
class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
5234-
NVPTXInst<(outs), (ins), "?", []>,
5235-
Requires<[hasTcgen05Instructions]> {
5238+
NVPTXInst<(outs), (ins), "?", []> {
52365239

52375240
TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
52385241
NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
@@ -5256,8 +5259,7 @@ class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
52565259
//
52575260

52585261
class TCGEN05_ST_INST<string Shape, int Num, bit Unpack> :
5259-
NVPTXInst<(outs), (ins), "?", []>,
5260-
Requires<[hasTcgen05Instructions]> {
5262+
NVPTXInst<(outs), (ins), "?", []> {
52615263

52625264
TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
52635265
NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;

llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,40 @@ const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const {
7272
return TSInfo.get();
7373
}
7474

75+
bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion,
76+
ArrayRef<unsigned> SMVersions) const {
77+
unsigned PTXVer = getPTXVersion();
78+
if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion)
79+
return false;
80+
81+
unsigned SMVer = getSmVersion();
82+
return llvm::any_of(SMVersions, [&](unsigned SM) {
83+
// sm_101 is a different family, never group it with sm_10x.
84+
if (SMVer == 101 || SM == 101)
85+
return SMVer == SM &&
86+
// PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
87+
// supported.
88+
!(PTXVer >= 90 && SMVer == 101);
89+
90+
return getSmFamilyVersion() == SM / 10 && SMVer >= SM;
91+
});
92+
}
93+
94+
bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion,
95+
ArrayRef<unsigned> SMVersions) const {
96+
unsigned PTXVer = getPTXVersion();
97+
if (!hasArchAccelFeatures() || PTXVer < PTXVersion)
98+
return false;
99+
100+
unsigned SMVer = getSmVersion();
101+
return llvm::any_of(SMVersions, [&](unsigned SM) {
102+
return SMVer == SM &&
103+
// PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
104+
// supported.
105+
!(PTXVer >= 90 && SMVer == 101);
106+
});
107+
}
108+
75109
bool NVPTXSubtarget::allowFP16Math() const {
76110
return hasFP16Math() && NoF16Math == false;
77111
}

llvm/lib/Target/NVPTX/NVPTXSubtarget.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,18 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
7373

7474
const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
7575

76+
// Checks PTX version and family-specific and architecture-specific SM
77+
// versions. For example, sm_100{f/a} and any future variants in the same
78+
// family will match for any PTX version greater than or equal to
79+
// `PTXVersion`.
80+
bool hasPTXWithFamilySMs(unsigned PTXVersion,
81+
ArrayRef<unsigned> SMVersions) const;
82+
// Checks PTX version and architecture-specific SM versions.
83+
// For example, sm_100{a} will match for any PTX version greater than or equal
84+
// to `PTXVersion`.
85+
bool hasPTXWithAccelSMs(unsigned PTXVersion,
86+
ArrayRef<unsigned> SMVersions) const;
87+
7688
bool has256BitVectorLoadStore(unsigned AS) const {
7789
return SmVersion >= 100 && PTXVersion >= 88 &&
7890
AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
@@ -127,6 +139,27 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
127139
return HasTcgen05 && PTXVersion >= MinPTXVersion;
128140
}
129141

142+
// Checks following instructions support:
143+
// - tcgen05.ld/st
144+
// - tcgen05.alloc/dealloc/relinquish
145+
// - tcgen05.cp
146+
// - tcgen05.fence/wait
147+
// - tcgen05.commit
148+
bool hasTcgen05InstSupport() const {
149+
// sm_101 renamed to sm_110 in PTX 9.0
150+
return hasPTXWithFamilySMs(90, {100, 110}) ||
151+
hasPTXWithFamilySMs(88, {100, 101}) ||
152+
hasPTXWithAccelSMs(86, {100, 101});
153+
}
154+
155+
// Checks tcgen05.shift instruction support.
156+
bool hasTcgen05ShiftSupport() const {
157+
// sm_101 renamed to sm_110 in PTX 9.0
158+
return hasPTXWithAccelSMs(90, {100, 110, 103}) ||
159+
hasPTXWithAccelSMs(88, {100, 101, 103}) ||
160+
hasPTXWithAccelSMs(86, {100, 101});
161+
}
162+
130163
bool hasTcgen05MMAScaleInputDImm() const {
131164
return FullSmVersion == 1003 && PTXVersion >= 86;
132165
}
@@ -158,6 +191,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
158191
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
159192
unsigned int getFullSmVersion() const { return FullSmVersion; }
160193
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
194+
unsigned int getSmFamilyVersion() const { return getFullSmVersion() / 100; }
161195
// GPUs with "a" suffix have architecture-accelerated features that are
162196
// supported on the specified architecture only, hence such targets do not
163197
// follow the onion layer model. hasArchAccelFeatures() allows distinguishing

llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK_PTX64 %s
33
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | FileCheck --check-prefixes=CHECK_PTX64_SHARED32 %s
44
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s
5+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s
6+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK_PTX64 %s
57
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
68
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | %ptxas-verify -arch=sm_100a %}
79
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
10+
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
11+
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
812

913

1014
declare void @llvm.nvvm.tcgen05.alloc.cg1(ptr %addr, i32 %ncols)

llvm/test/CodeGen/NVPTX/tcgen05-commit.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK_PTX64 %s
33
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | FileCheck --check-prefixes=CHECK_PTX64_SHARED32 %s
44
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s
5+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s
6+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK_PTX64 %s
57
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
68
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | %ptxas-verify -arch=sm_100a %}
79
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
10+
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
11+
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
812

913
declare void @llvm.nvvm.tcgen05.commit.cg1(ptr %bar_addr)
1014
declare void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr)

llvm/test/CodeGen/NVPTX/tcgen05-cp.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s
33
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
4+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
5+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK %s
46
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
57
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
8+
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
9+
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
610

711
define void @test_tcgen05_cp_64x128_v1_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
812
; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg1(

llvm/test/CodeGen/NVPTX/tcgen05-fence.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s
33
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
4+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
5+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK %s
46
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
57
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
8+
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
9+
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}
610

711
declare void @llvm.nvvm.tcgen05.fence.before.thread.sync()
812
declare void @llvm.nvvm.tcgen05.fence.after.thread.sync()

llvm/test/CodeGen/NVPTX/tcgen05-ld.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22
; RUN: llc < %s -o - -mcpu=sm_100a -march=nvptx64 -mattr=+ptx86 | FileCheck %s
33
; RUN: llc < %s -o - -mcpu=sm_101a -march=nvptx64 -mattr=+ptx86 | FileCheck %s
44
; RUN: llc < %s -o - -mcpu=sm_103a -march=nvptx64 -mattr=+ptx88 | FileCheck %s
5+
; RUN: llc < %s -o - -mcpu=sm_100f -march=nvptx64 -mattr=+ptx88 | FileCheck %s
6+
; RUN: llc < %s -o - -mcpu=sm_110f -march=nvptx64 -mattr=+ptx90 | FileCheck %s
57
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_100a | %ptxas-verify -arch=sm_100a %}
68
; RUN: %if ptxas-sm_101a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_101a | %ptxas-verify -arch=sm_101a %}
79
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mattr=+ptx88 -mcpu=sm_103a | %ptxas-verify -arch=sm_103a %}
10+
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mattr=+ptx88 -mcpu=sm_100f | %ptxas-verify -arch=sm_100f %}
11+
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mattr=+ptx90 -mcpu=sm_110f | %ptxas-verify -arch=sm_110f %}
812

913
; CHECK-LABEL: nvvm_tcgen05_ld_16x64b
1014
define void @nvvm_tcgen05_ld_16x64b(ptr addrspace(6) %taddr) {

0 commit comments

Comments
 (0)