Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1339,20 +1339,18 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
SDValue Offset, Base;
SelectADDR(ST->getBasePtr(), Base, Offset);

SDValue Ops[] = {Value,
SDValue Ops[] = {selectPossiblyImm(Value),
getI32Imm(Ordering, DL),
getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
getI32Imm(NVPTX::PTXLdStInstCode::Untyped, DL),
getI32Imm(ToTypeWidth, DL),
Base,
Offset,
Chain};

const MVT::SimpleValueType SourceVT =
Value.getNode()->getSimpleValueType(0).SimpleTy;
const std::optional<unsigned> Opcode = pickOpcodeForVT(
SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32, NVPTX::ST_i64);
const std::optional<unsigned> Opcode =
pickOpcodeForVT(Value.getSimpleValueType().SimpleTy, NVPTX::ST_i8,
NVPTX::ST_i16, NVPTX::ST_i32, NVPTX::ST_i64);
if (!Opcode)
return false;

Expand Down Expand Up @@ -1389,7 +1387,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {

const unsigned NumElts = getLoadStoreVectorNumElts(ST);

SmallVector<SDValue, 16> Ops(ST->ops().slice(1, NumElts));
SmallVector<SDValue, 16> Ops;
for (auto &V : ST->ops().slice(1, NumElts))
Ops.push_back(selectPossiblyImm(V));
SDValue Addr = N->getOperand(NumElts + 1);
const unsigned ToTypeWidth = TotalWidth / NumElts;

Expand All @@ -1400,9 +1400,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
SelectADDR(Addr, Base, Offset);

Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
getI32Imm(NVPTX::PTXLdStInstCode::Untyped, DL),
getI32Imm(ToTypeWidth, DL), Base, Offset, Chain});
getI32Imm(CodeAddrSpace, DL), getI32Imm(ToTypeWidth, DL), Base,
Offset, Chain});

const MVT::SimpleValueType EltVT =
ST->getOperand(1).getSimpleValueType().SimpleTy;
Expand Down Expand Up @@ -2102,6 +2101,19 @@ bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
return true;
}

SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
if (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);

if (auto *CN = dyn_cast<ConstantSDNode>(V))
return CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(V),
V.getValueType());
if (auto *CN = dyn_cast<ConstantFPSDNode>(V))
return CurDAG->getTargetConstantFP(CN->getValueAPF(), SDLoc(V),
V.getValueType());
return V;
}

bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
unsigned int spN) const {
const Value *Src = nullptr;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
}

bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset);
SDValue selectPossiblyImm(SDValue V);

bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;

Expand Down
63 changes: 38 additions & 25 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,18 @@ class OneUse2<SDPatternOperator operator>
class fpimm_pos_inf<ValueType vt>
: FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;



// Operands which can hold a Register or an Immediate.
//
// Unfortunately, since most register classes can hold multiple types, we must
// use the 'Any' type for these.

def RI1 : Operand<i1>;
def RI16 : Operand<Any>;
def RI32 : Operand<Any>;
def RI64 : Operand<Any>;

// Utility class to wrap up information about a register and DAG type for more
// convenient iteration and parameterization
class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
Expand Down Expand Up @@ -2276,19 +2288,20 @@ let mayLoad=1, hasSideEffects=0 in {
def LD_i64 : LD<B64>;
}

class ST<NVPTXRegClass regclass>
class ST<DAGOperand O>
: NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Sign, i32imm:$toWidth, ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$toWidth"
(ins O:$src,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$toWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
" \t[$addr], $src;", []>;

let mayStore=1, hasSideEffects=0 in {
def ST_i8 : ST<B16>;
def ST_i16 : ST<B16>;
def ST_i32 : ST<B32>;
def ST_i64 : ST<B64>;
def ST_i8 : ST<RI16>;
def ST_i16 : ST<RI16>;
def ST_i32 : ST<RI32>;
def ST_i64 : ST<RI64>;
}

// The following is used only in and after vector elementizations. Vector
Expand Down Expand Up @@ -2324,38 +2337,38 @@ let mayLoad=1, hasSideEffects=0 in {
defm LDV_i64 : LD_VEC<B64>;
}

multiclass ST_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
def _v2 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Sign, i32imm:$fromWidth,
(ins O:$src1, O:$src2,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
"\t[$addr], {{$src1, $src2}};", []>;
def _v4 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
(ins O:$src1, O:$src2, O:$src3, O:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
if support_v8 then
def _v8 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
regclass:$src5, regclass:$src6, regclass:$src7, regclass:$src8,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Sign,
i32imm:$fromWidth, ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
(ins O:$src1, O:$src2, O:$src3, O:$src4,
O:$src5, O:$src6, O:$src7, O:$src8,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v8.b$fromWidth "
"\t[$addr], "
"{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};", []>;
}

let mayStore=1, hasSideEffects=0 in {
defm STV_i8 : ST_VEC<B16>;
defm STV_i16 : ST_VEC<B16>;
defm STV_i32 : ST_VEC<B32, support_v8 = true>;
defm STV_i64 : ST_VEC<B64>;
defm STV_i8 : ST_VEC<RI16>;
defm STV_i16 : ST_VEC<RI16>;
defm STV_i32 : ST_VEC<RI32, support_v8 = true>;
defm STV_i64 : ST_VEC<RI64>;
}

//---- Conversion ----
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/NVPTX/access-non-generic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,7 @@ define void @nested_const_expr() {
; PTX-LABEL: nested_const_expr(
; store 1 to bitcast(gep(addrspacecast(array), 0, 1))
store i32 1, ptr getelementptr ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 0, i64 1), align 4
; PTX: mov.b32 %r1, 1;
; PTX-NEXT: st.shared.b32 [array+4], %r1;
; PTX: st.shared.b32 [array+4], 1;
ret void
}

Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/NVPTX/bf16-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1605,5 +1605,23 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
ret <2 x bfloat> %r
}

define void @store_bf16(ptr %p1, ptr %p2, bfloat %v) {
; CHECK-LABEL: store_bf16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [store_bf16_param_0];
; CHECK-NEXT: ld.param.b16 %rs1, [store_bf16_param_2];
; CHECK-NEXT: st.b16 [%rd1], %rs1;
; CHECK-NEXT: ld.param.b64 %rd2, [store_bf16_param_1];
; CHECK-NEXT: st.b16 [%rd2], 0x3F80;
; CHECK-NEXT: ret;
store bfloat %v, ptr %p1
store bfloat 1.0, ptr %p2
ret void
}

declare bfloat @llvm.maximum.bf16(bfloat, bfloat)
declare <2 x bfloat> @llvm.maximum.v2bf16(<2 x bfloat>, <2 x bfloat>)
17 changes: 17 additions & 0 deletions llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -723,3 +723,20 @@ define <2 x bfloat> @test_copysign(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
ret <2 x bfloat> %r
}

define void @test_store_bf16x2(ptr %p1, ptr %p2, <2 x bfloat> %v) {
; CHECK-LABEL: test_store_bf16x2(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_store_bf16x2_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [test_store_bf16x2_param_2];
; CHECK-NEXT: st.b32 [%rd1], %r1;
; CHECK-NEXT: ld.param.b64 %rd2, [test_store_bf16x2_param_1];
; CHECK-NEXT: st.b32 [%rd2], 1065369472;
; CHECK-NEXT: ret;
store <2 x bfloat> %v, ptr %p1
store <2 x bfloat> <bfloat 1.0, bfloat 1.0>, ptr %p2
ret void
}
9 changes: 4 additions & 5 deletions llvm/test/CodeGen/NVPTX/chain-different-as.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
define i64 @test() nounwind readnone {
; CHECK-LABEL: test(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: mov.b64 %rd1, 1;
; CHECK-NEXT: mov.b64 %rd2, 42;
; CHECK-NEXT: st.b64 [%rd1], %rd2;
; CHECK-NEXT: ld.global.b64 %rd3, [%rd1];
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
; CHECK-NEXT: st.b64 [%rd1], 42;
; CHECK-NEXT: ld.global.b64 %rd2, [%rd1];
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
; CHECK-NEXT: ret;
%addr0 = inttoptr i64 1 to ptr
%addr1 = inttoptr i64 1 to ptr addrspace(1)
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/NVPTX/demote-vars.ll
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ define void @define_private_global(i64 %val) {
; Also check that the if-then is still here, otherwise we may not be testing
; the "more-than-one-use" part.
; CHECK: st.shared.b64 [private_global_used_more_than_once_in_same_fct],
; CHECK: mov.b64 %[[VAR:.*]], 25
; CHECK: st.shared.b64 [private_global_used_more_than_once_in_same_fct], %[[VAR]]
; CHECK: st.shared.b64 [private_global_used_more_than_once_in_same_fct], 25
define void @define_private_global_more_than_one_use(i64 %val, i1 %cond) {
store i64 %val, ptr addrspace(3) @private_global_used_more_than_once_in_same_fct
br i1 %cond, label %then, label %end
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2295,5 +2295,25 @@ define <2 x half> @test_uitofp_2xi16_to_2xhalf(<2 x i16> %a) #0 {
ret <2 x half> %r
}

define void @test_store_2xhalf(ptr %p1, ptr %p2, <2 x half> %v) {
; CHECK-LABEL: test_store_2xhalf(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_store_2xhalf_param_2];
; CHECK-NEXT: ld.param.b64 %rd2, [test_store_2xhalf_param_1];
; CHECK-NEXT: ld.param.b64 %rd1, [test_store_2xhalf_param_0];
; CHECK-NEXT: st.b32 [%rd1], %r1;
; CHECK-NEXT: st.b32 [%rd2], 1006648320;
; CHECK-NEXT: ret;
store <2 x half> %v, ptr %p1
store <2 x half> <half 1.0, half 1.0>, ptr %p2
ret void
}



attributes #0 = { nounwind }
attributes #1 = { "unsafe-fp-math" = "true" }
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/NVPTX/i1-load-lower.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ target triple = "nvptx-nvidia-cuda"
define void @foo() {
; CHECK-LABEL: foo(
; CHECK: .reg .pred %p<2>;
; CHECK: .reg .b16 %rs<4>;
; CHECK: .reg .b16 %rs<3>;
; CHECK-EMPTY:
; CHECK: ld.global.b8 %rs1, [i1g];
; CHECK: and.b16 %rs2, %rs1, 1;
; CHECK: setp.ne.b16 %p1, %rs2, 0;
; CHECK: @%p1 bra $L__BB0_2;
; CHECK: mov.b16 %rs3, 1;
; CHECK: st.global.b8 [i1g], %rs3;
; CHECK: st.global.b8 [i1g], 1;
; CHECK: ret;
%tmp = load i1, ptr addrspace(1) @i1g, align 2
br i1 %tmp, label %if.end, label %if.then
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/NVPTX/i128-ld-st.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ define i128 @foo(ptr %p, ptr %o) {
; CHECK-NEXT: ld.param.b64 %rd2, [foo_param_1];
; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0];
; CHECK-NEXT: ld.b8 %rd3, [%rd1];
; CHECK-NEXT: st.v2.b64 [%rd2], {%rd3, 0};
; CHECK-NEXT: mov.b64 %rd4, 0;
; CHECK-NEXT: st.v2.b64 [%rd2], {%rd3, %rd4};
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4};
; CHECK-NEXT: ret;
%c = load i8, ptr %p, align 1
Expand Down
14 changes: 5 additions & 9 deletions llvm/test/CodeGen/NVPTX/jump-table.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ define void @foo(i32 %i) {
; CHECK-LABEL: foo(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b32 %r2, [foo_param_0];
Expand All @@ -24,20 +24,16 @@ define void @foo(i32 %i) {
; CHECK-NEXT: $L__BB0_5;
; CHECK-NEXT: brx.idx %r2, $L_brx_0;
; CHECK-NEXT: $L__BB0_2: // %case0
; CHECK-NEXT: mov.b32 %r6, 0;
; CHECK-NEXT: st.global.b32 [out], %r6;
; CHECK-NEXT: st.global.b32 [out], 0;
; CHECK-NEXT: bra.uni $L__BB0_6;
; CHECK-NEXT: $L__BB0_4: // %case2
; CHECK-NEXT: mov.b32 %r4, 2;
; CHECK-NEXT: st.global.b32 [out], %r4;
; CHECK-NEXT: st.global.b32 [out], 2;
; CHECK-NEXT: bra.uni $L__BB0_6;
; CHECK-NEXT: $L__BB0_5: // %case3
; CHECK-NEXT: mov.b32 %r3, 3;
; CHECK-NEXT: st.global.b32 [out], %r3;
; CHECK-NEXT: st.global.b32 [out], 3;
; CHECK-NEXT: bra.uni $L__BB0_6;
; CHECK-NEXT: $L__BB0_3: // %case1
; CHECK-NEXT: mov.b32 %r5, 1;
; CHECK-NEXT: st.global.b32 [out], %r5;
; CHECK-NEXT: st.global.b32 [out], 1;
; CHECK-NEXT: $L__BB0_6: // %end
; CHECK-NEXT: ret;
entry:
Expand Down
13 changes: 5 additions & 8 deletions llvm/test/CodeGen/NVPTX/local-stack-frame.ll
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ define void @foo4() {
; PTX32-NEXT: .local .align 4 .b8 __local_depot3[8];
; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
; PTX32-NEXT: .reg .b32 %r<6>;
; PTX32-NEXT: .reg .b32 %r<5>;
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
; PTX32-NEXT: mov.b32 %SPL, __local_depot3;
Expand All @@ -145,9 +145,8 @@ define void @foo4() {
; PTX32-NEXT: add.u32 %r2, %SPL, 0;
; PTX32-NEXT: add.u32 %r3, %SP, 4;
; PTX32-NEXT: add.u32 %r4, %SPL, 4;
; PTX32-NEXT: mov.b32 %r5, 0;
; PTX32-NEXT: st.local.b32 [%r2], %r5;
; PTX32-NEXT: st.local.b32 [%r4], %r5;
; PTX32-NEXT: st.local.b32 [%r2], 0;
; PTX32-NEXT: st.local.b32 [%r4], 0;
; PTX32-NEXT: { // callseq 1, 0
; PTX32-NEXT: .param .b32 param0;
; PTX32-NEXT: st.param.b32 [param0], %r1;
Expand All @@ -165,7 +164,6 @@ define void @foo4() {
; PTX64-NEXT: .local .align 4 .b8 __local_depot3[8];
; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b32 %r<2>;
; PTX64-NEXT: .reg .b64 %rd<5>;
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
Expand All @@ -175,9 +173,8 @@ define void @foo4() {
; PTX64-NEXT: add.u64 %rd2, %SPL, 0;
; PTX64-NEXT: add.u64 %rd3, %SP, 4;
; PTX64-NEXT: add.u64 %rd4, %SPL, 4;
; PTX64-NEXT: mov.b32 %r1, 0;
; PTX64-NEXT: st.local.b32 [%rd2], %r1;
; PTX64-NEXT: st.local.b32 [%rd4], %r1;
; PTX64-NEXT: st.local.b32 [%rd2], 0;
; PTX64-NEXT: st.local.b32 [%rd4], 0;
; PTX64-NEXT: { // callseq 1, 0
; PTX64-NEXT: .param .b64 param0;
; PTX64-NEXT: st.param.b64 [param0], %rd1;
Expand Down
Loading
Loading