Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ static unsigned getTcgen05LdOpcode(unsigned IID, bool enablePack) {
}

void NVPTXDAGToDAGISel::SelectTcgen05Ld(SDNode *N, bool hasOffset) {
if (!Subtarget->hasTcgen05InstSupport())
report_fatal_error(
"tcgen05.ld is not supported on this architecture variant");

SDLoc DL(N);
unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();

Expand Down Expand Up @@ -2136,6 +2140,10 @@ static unsigned getTcgen05StOpcode(unsigned IID, bool enableUnpack) {
}

void NVPTXDAGToDAGISel::SelectTcgen05St(SDNode *N, bool hasOffset) {
if (!Subtarget->hasTcgen05InstSupport())
report_fatal_error(
"tcgen05.st is not supported on this architecture variant");

SDLoc DL(N);
unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();

Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,22 @@ def PrmtMode : Operand<i32> {
// NVPTX Instruction Predicate Definitions
//===----------------------------------------------------------------------===//

// Checks PTX version and family-specific and architecture-specific SM versions.
// For example, sm_100{f/a} and any future variants in the same family will match
// for any PTX version greater than or equal to `PTXVersion`.
class PTXWithFamilySMs<int PTXVersion, list<int> SMVersions> :
Predicate<"Subtarget->hasPTXWithFamilySMs(" # PTXVersion # ", {" #
!interleave(SMVersions, ", ") # "})">;

// Checks PTX version and architecture-specific SM versions.
// For example, sm_100{a} will match for any PTX version
// greater than or equal to `PTXVersion`.
class PTXWithAccelSMs<int PTXVersion, list<int> SMVersions> :
Predicate<"Subtarget->hasPTXWithAccelSMs(" # PTXVersion # ", {" #
!interleave(SMVersions, ", ") # "})">;

// Helper predicate to call a subtarget method.
class callSubtarget<string SubtargetMethod> : Predicate<"Subtarget->" # SubtargetMethod # "()">;

def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">;
def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">;
Expand Down
34 changes: 18 additions & 16 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -5103,8 +5103,8 @@ let Predicates = [hasSM<90>, hasPTX<78>] in {
def EXIT : NullaryInst<"exit", int_nvvm_exit>;

// Tcgen05 intrinsics
let isConvergent = true, Predicates = [hasTcgen05Instructions] in {

let isConvergent = true in {
let Predicates = [callSubtarget<"hasTcgen05InstSupport">] in {
multiclass TCGEN05_ALLOC_INTR<string AS, string num, Intrinsic Intr> {
def "" : BasicNVPTXInst<(outs),
(ins ADDR:$dst, B32:$ncols),
Expand Down Expand Up @@ -5156,15 +5156,6 @@ defm TCGEN05_COMMIT_CG2 : TCGEN05_COMMIT_INTR<"", "2">;
defm TCGEN05_COMMIT_S64_CG1 : TCGEN05_COMMIT_INTR<"shared", "1">;
defm TCGEN05_COMMIT_S64_CG2 : TCGEN05_COMMIT_INTR<"shared", "2">;

multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
def "" : BasicNVPTXInst<(outs),
(ins ADDR:$tmem_addr),
"tcgen05.shift.cta_group::" # num # ".down",
[(Intr addr:$tmem_addr)]>;
}
defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;

multiclass TCGEN05_CP_INTR<string shape, string src_fmt, string mc = ""> {
defvar dst_fmt = !if(!eq(src_fmt, ""), "", ".b8x16");
defvar fmt_asm = StrJoin<".", [dst_fmt, src_fmt]>.ret;
Expand Down Expand Up @@ -5195,9 +5186,22 @@ foreach src_fmt = ["", "b6x16_p32", "b4x16_p64"] in {
defm TCGEN05_CP_64x128_2 # src_fmt : TCGEN05_CP_INTR<"64x128b", src_fmt, "warpx2::01_23">;
defm TCGEN05_CP_32x128 # src_fmt : TCGEN05_CP_INTR<"32x128b", src_fmt, "warpx4">;
}
} // Predicates

let Predicates = [callSubtarget<"hasTcgen05ShiftSupport">] in {
multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
def "" : BasicNVPTXInst<(outs),
(ins ADDR:$tmem_addr),
"tcgen05.shift.cta_group::" # num # ".down",
[(Intr addr:$tmem_addr)]>;
}
defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;
} // Predicates

} // isConvergent

let hasSideEffects = 1, Predicates = [hasTcgen05Instructions] in {
let hasSideEffects = 1, Predicates = [callSubtarget<"hasTcgen05InstSupport">] in {

def tcgen05_fence_before_thread_sync: NullaryInst<
"tcgen05.fence::before_thread_sync", int_nvvm_tcgen05_fence_before_thread_sync>;
Expand Down Expand Up @@ -5231,8 +5235,7 @@ class TCGEN05_LDST_REGINFO<int Veclen> {
//

class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
NVPTXInst<(outs), (ins), "?", []>,
Requires<[hasTcgen05Instructions]> {
NVPTXInst<(outs), (ins), "?", []> {

TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
Expand All @@ -5256,8 +5259,7 @@ class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
//

class TCGEN05_ST_INST<string Shape, int Num, bit Unpack> :
NVPTXInst<(outs), (ins), "?", []>,
Requires<[hasTcgen05Instructions]> {
NVPTXInst<(outs), (ins), "?", []> {

TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
Expand Down
34 changes: 34 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,40 @@ const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const {
return TSInfo.get();
}

bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion,
ArrayRef<unsigned> SMVersions) const {
unsigned PTXVer = getPTXVersion();
if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion)
return false;

unsigned SMVer = getSmVersion();
return llvm::any_of(SMVersions, [&](unsigned SM) {
// sm_101 is a different family, never group it with sm_10x.
if (SMVer == 101 || SM == 101)
return SMVer == SM &&
// PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
// supported.
!(PTXVer >= 90 && SMVer == 101);

return getSmFamilyVersion() == SM / 10 && SMVer >= SM;
});
}

bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion,
ArrayRef<unsigned> SMVersions) const {
unsigned PTXVer = getPTXVersion();
if (!hasArchAccelFeatures() || PTXVer < PTXVersion)
return false;

unsigned SMVer = getSmVersion();
return llvm::any_of(SMVersions, [&](unsigned SM) {
return SMVer == SM &&
// PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
// supported.
!(PTXVer >= 90 && SMVer == 101);
});
}

bool NVPTXSubtarget::allowFP16Math() const {
return hasFP16Math() && NoF16Math == false;
}
Expand Down
34 changes: 34 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {

const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;

// Checks PTX version and family-specific and architecture-specific SM
// versions. For example, sm_100{f/a} and any future variants in the same
// family will match for any PTX version greater than or equal to
// `PTXVersion`.
bool hasPTXWithFamilySMs(unsigned PTXVersion,
ArrayRef<unsigned> SMVersions) const;
// Checks PTX version and architecture-specific SM versions.
// For example, sm_100{a} will match for any PTX version greater than or equal
// to `PTXVersion`.
bool hasPTXWithAccelSMs(unsigned PTXVersion,
ArrayRef<unsigned> SMVersions) const;

bool has256BitVectorLoadStore(unsigned AS) const {
return SmVersion >= 100 && PTXVersion >= 88 &&
AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
Expand Down Expand Up @@ -127,6 +139,27 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
return HasTcgen05 && PTXVersion >= MinPTXVersion;
}

// Checks following instructions support:
// - tcgen05.ld/st
// - tcgen05.alloc/dealloc/relinquish
// - tcgen05.cp
// - tcgen05.fence/wait
// - tcgen05.commit
bool hasTcgen05InstSupport() const {
// sm_101 renamed to sm_110 in PTX 9.0
return hasPTXWithFamilySMs(90, {100, 110}) ||
hasPTXWithFamilySMs(88, {100, 101}) ||
hasPTXWithAccelSMs(86, {100, 101});
}

// Checks tcgen05.shift instruction support.
bool hasTcgen05ShiftSupport() const {
// sm_101 renamed to sm_110 in PTX 9.0
return hasPTXWithAccelSMs(90, {100, 110, 103}) ||
hasPTXWithAccelSMs(88, {100, 101, 103}) ||
hasPTXWithAccelSMs(86, {100, 101});
}

bool hasTcgen05MMAScaleInputDImm() const {
return FullSmVersion == 1003 && PTXVersion >= 86;
}
Expand Down Expand Up @@ -158,6 +191,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
unsigned int getSmFamilyVersion() const { return getFullSmVersion() / 100; }
// GPUs with "a" suffix have architecture-accelerated features that are
// supported on the specified architecture only, hence such targets do not
// follow the onion layer model. hasArchAccelFeatures() allows distinguishing
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK_PTX64 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | FileCheck --check-prefixes=CHECK_PTX64_SHARED32 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK_PTX64 %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}


declare void @llvm.nvvm.tcgen05.alloc.cg1(ptr %addr, i32 %ncols)
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/NVPTX/tcgen05-commit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK_PTX64 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | FileCheck --check-prefixes=CHECK_PTX64_SHARED32 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK_PTX64 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK_PTX64 %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 --nvptx-short-ptr | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}

declare void @llvm.nvvm.tcgen05.commit.cg1(ptr %bar_addr)
declare void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr)
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/NVPTX/tcgen05-cp.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}

define void @test_tcgen05_cp_64x128_v1_cg1(ptr addrspace(6) %addr, i64 %sdesc) {
; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg1(
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/NVPTX/tcgen05-fence.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | FileCheck --check-prefixes=CHECK %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}

declare void @llvm.nvvm.tcgen05.fence.before.thread.sync()
declare void @llvm.nvvm.tcgen05.fence.after.thread.sync()
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/NVPTX/tcgen05-ld.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
; RUN: llc < %s -o - -mcpu=sm_100a -march=nvptx64 -mattr=+ptx86 | FileCheck %s
; RUN: llc < %s -o - -mcpu=sm_101a -march=nvptx64 -mattr=+ptx86 | FileCheck %s
; RUN: llc < %s -o - -mcpu=sm_103a -march=nvptx64 -mattr=+ptx88 | FileCheck %s
; RUN: llc < %s -o - -mcpu=sm_100f -march=nvptx64 -mattr=+ptx88 | FileCheck %s
; RUN: llc < %s -o - -mcpu=sm_110f -march=nvptx64 -mattr=+ptx90 | FileCheck %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_100a | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_101a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_101a | %ptxas-verify -arch=sm_101a %}
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mattr=+ptx88 -mcpu=sm_103a | %ptxas-verify -arch=sm_103a %}
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mattr=+ptx88 -mcpu=sm_100f | %ptxas-verify -arch=sm_100f %}
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mattr=+ptx90 -mcpu=sm_110f | %ptxas-verify -arch=sm_110f %}

; CHECK-LABEL: nvvm_tcgen05_ld_16x64b
define void @nvvm_tcgen05_ld_16x64b(ptr addrspace(6) %taddr) {
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/NVPTX/tcgen05-shift.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | FileCheck --check-prefixes=CHECK %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_110a -mattr=+ptx90 | FileCheck --check-prefixes=CHECK %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
; RUN: %if ptxas-sm_110a && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110a -mattr=+ptx90 | %ptxas-verify -arch=sm_110a %}

declare void @llvm.nvvm.tcgen05.shift.down.cg1(ptr addrspace(6) %tmem_addr)
declare void @llvm.nvvm.tcgen05.shift.down.cg2(ptr addrspace(6) %tmem_addr)
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/NVPTX/tcgen05-st.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
; RUN: llc < %s -o - -mcpu=sm_100a -march=nvptx64 -mattr=+ptx86 | FileCheck %s
; RUN: llc < %s -o - -mcpu=sm_101a -march=nvptx64 -mattr=+ptx86 | FileCheck %s
; RUN: llc < %s -o - -mcpu=sm_103a -march=nvptx64 -mattr=+ptx88 | FileCheck %s
; RUN: llc < %s -o - -mcpu=sm_100f -march=nvptx64 -mattr=+ptx88 | FileCheck %s
; RUN: llc < %s -o - -mcpu=sm_110f -march=nvptx64 -mattr=+ptx90 | FileCheck %s
; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
; RUN: %if ptxas-sm_101a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_101a -mattr=+ptx86 | %ptxas-verify -arch=sm_101a %}
; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %}
; RUN: %if ptxas-sm_100f && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100f -mattr=+ptx88 | %ptxas-verify -arch=sm_100f %}
; RUN: %if ptxas-sm_110f && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mcpu=sm_110f -mattr=+ptx90 | %ptxas-verify -arch=sm_110f %}

; CHECK-LABEL: nvvm_tcgen05_st_16x64b
define void @nvvm_tcgen05_st_16x64b(ptr addrspace(6) %taddr, i32 %stv1, <2 x i32> %stv2, <4 x i32> %stv4, <8 x i32> %stv8, <16 x i32> %stv16, <32 x i32> %stv32, <64 x i32> %stv64, <128 x i32> %stv128) {
Expand Down
Loading