Skip to content

Commit 8f33b13

Browse files
committed
v2i8
1 parent d13098b commit 8f33b13

File tree

4 files changed

+28
-48
lines changed

4 files changed

+28
-48
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
370370
} else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) {
371371
// v2i8 is promoted to v2i16
372372
NumElts = 1;
373-
EltVT = MVT::v2i16;
373+
EltVT = MVT::v2i8;
374374
}
375375
for (unsigned j = 0; j != NumElts; ++j) {
376376
ValueVTs.push_back(EltVT);
@@ -3373,10 +3373,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
33733373
}
33743374
InVals.push_back(P);
33753375
} else {
3376-
bool aggregateIsPacked = false;
3377-
if (StructType *STy = dyn_cast<StructType>(Ty))
3378-
aggregateIsPacked = STy->isPacked();
3379-
33803376
SmallVector<EVT, 16> VTs;
33813377
SmallVector<uint64_t, 16> Offsets;
33823378
ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
@@ -3403,14 +3399,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
34033399
SDValue VecAddr = DAG.getObjectPtrOffset(
34043400
dl, ArgSymbol, TypeSize::getFixed(Offsets[I]));
34053401

3406-
const MaybeAlign PartAlign = [&]() -> MaybeAlign {
3407-
if (aggregateIsPacked)
3408-
return Align(1);
3409-
if (NumElts != 1)
3410-
return std::nullopt;
3411-
Align PartAlign = DAG.getEVTAlign(EltVT);
3412-
return commonAlignment(PartAlign, Offsets[I]);
3413-
}();
3402+
const MaybeAlign PartAlign = commonAlignment(ArgAlign, Offsets[I]);
34143403
SDValue P =
34153404
DAG.getLoad(VecVT, dl, Root, VecAddr,
34163405
MachinePointerInfo(ADDRESS_SPACE_PARAM), PartAlign,
@@ -3422,14 +3411,14 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
34223411
SDValue Elt = DAG.getNode(LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
34233412
: ISD::EXTRACT_VECTOR_ELT,
34243413
dl, LoadVT, P,
3425-
DAG.getIntPtrConstant(J * PackingAmt, dl));
3414+
DAG.getVectorIdxConstant(J * PackingAmt, dl));
34263415

34273416
// Extend or truncate the element if necessary (e.g. an i8 is loaded
34283417
// into an i16 register)
34293418
const EVT ExpactedVT = ArgIns[I + J].VT;
3430-
assert((Elt.getValueType().bitsEq(ExpactedVT) ||
3431-
(ExpactedVT.isScalarInteger() &&
3432-
Elt.getValueType().isScalarInteger())) &&
3419+
assert((Elt.getValueType() == ExpactedVT ||
3420+
(ExpactedVT.isInteger() &&
3421+
Elt.getValueType().isInteger())) &&
34333422
"Non-integer argument type size mismatch");
34343423
if (ExpactedVT.bitsGT(Elt.getValueType()))
34353424
Elt = DAG.getNode(getExtOpcode(ArgIns[I + J].Flags), dl, ExpactedVT,

llvm/test/CodeGen/NVPTX/i8x2-instructions.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,15 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
1313
define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) {
1414
; CHECK-LABEL: test_bitcast_2xi8_i16(
1515
; CHECK: {
16-
; CHECK-NEXT: .reg .b16 %rs<6>;
16+
; CHECK-NEXT: .reg .b16 %rs<5>;
1717
; CHECK-NEXT: .reg .b32 %r<3>;
1818
; CHECK-EMPTY:
1919
; CHECK-NEXT: // %bb.0:
20-
; CHECK-NEXT: ld.param.b32 %r1, [test_bitcast_2xi8_i16_param_0];
21-
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
20+
; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
21+
; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
2222
; CHECK-NEXT: shl.b16 %rs3, %rs2, 8;
23-
; CHECK-NEXT: and.b16 %rs4, %rs1, 255;
24-
; CHECK-NEXT: or.b16 %rs5, %rs4, %rs3;
25-
; CHECK-NEXT: cvt.u32.u16 %r2, %rs5;
23+
; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3;
24+
; CHECK-NEXT: cvt.u32.u16 %r2, %rs4;
2625
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
2726
; CHECK-NEXT: ret;
2827
%res = bitcast <2 x i8> %a to i16

llvm/test/CodeGen/NVPTX/param-add.ll

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,24 @@ declare i32 @callee(%struct.1float %a)
1414
define i32 @test(%struct.1float alignstack(32) %data) {
1515
; CHECK-LABEL: test(
1616
; CHECK: {
17-
; CHECK-NEXT: .reg .b32 %r<16>;
17+
; CHECK-NEXT: .reg .b32 %r<7>;
1818
; CHECK-EMPTY:
1919
; CHECK-NEXT: // %bb.0:
20-
; CHECK-NEXT: ld.param.b8 %r1, [test_param_0+1];
21-
; CHECK-NEXT: shl.b32 %r2, %r1, 8;
22-
; CHECK-NEXT: ld.param.b8 %r3, [test_param_0];
23-
; CHECK-NEXT: or.b32 %r4, %r2, %r3;
24-
; CHECK-NEXT: ld.param.b8 %r5, [test_param_0+3];
25-
; CHECK-NEXT: shl.b32 %r6, %r5, 8;
26-
; CHECK-NEXT: ld.param.b8 %r7, [test_param_0+2];
27-
; CHECK-NEXT: or.b32 %r8, %r6, %r7;
28-
; CHECK-NEXT: shl.b32 %r9, %r8, 16;
29-
; CHECK-NEXT: or.b32 %r10, %r9, %r4;
30-
; CHECK-NEXT: shr.u32 %r11, %r10, 8;
31-
; CHECK-NEXT: shr.u32 %r12, %r10, 16;
32-
; CHECK-NEXT: shr.u32 %r13, %r10, 24;
20+
; CHECK-NEXT: ld.param.b32 %r1, [test_param_0];
21+
; CHECK-NEXT: shr.u32 %r2, %r1, 8;
22+
; CHECK-NEXT: shr.u32 %r3, %r1, 16;
23+
; CHECK-NEXT: shr.u32 %r4, %r1, 24;
3324
; CHECK-NEXT: { // callseq 0, 0
3425
; CHECK-NEXT: .param .align 1 .b8 param0[4];
35-
; CHECK-NEXT: st.param.b8 [param0], %r10;
36-
; CHECK-NEXT: st.param.b8 [param0+1], %r11;
37-
; CHECK-NEXT: st.param.b8 [param0+2], %r12;
38-
; CHECK-NEXT: st.param.b8 [param0+3], %r13;
26+
; CHECK-NEXT: st.param.b8 [param0], %r1;
27+
; CHECK-NEXT: st.param.b8 [param0+1], %r2;
28+
; CHECK-NEXT: st.param.b8 [param0+2], %r3;
29+
; CHECK-NEXT: st.param.b8 [param0+3], %r4;
3930
; CHECK-NEXT: .param .b32 retval0;
4031
; CHECK-NEXT: call.uni (retval0), callee, (param0);
4132
; CHECK-NEXT: ld.param.b32 %r14, [retval0];
4233
; CHECK-NEXT: } // callseq 0
43-
; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
34+
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
4435
; CHECK-NEXT: ret;
4536

4637
%1 = call i32 @callee(%struct.1float %data)

llvm/test/CodeGen/NVPTX/shift-opt.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,17 @@ define i64 @test_and(i64 %x, i32 %y) {
7070
define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
7171
; CHECK-LABEL: test_vec(
7272
; CHECK: {
73-
; CHECK-NEXT: .reg .b16 %rs<5>;
73+
; CHECK-NEXT: .reg .b16 %rs<7>;
7474
; CHECK-NEXT: .reg .b32 %r<5>;
7575
; CHECK-EMPTY:
7676
; CHECK-NEXT: // %bb.0:
77-
; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_vec_param_0];
78-
; CHECK-NEXT: ld.param.b32 %r1, [test_vec_param_1];
77+
; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_vec_param_1];
78+
; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
7979
; CHECK-NEXT: and.b32 %r2, %r1, 16711935;
80-
; CHECK-NEXT: shr.u16 %rs3, %rs2, 5;
81-
; CHECK-NEXT: shr.u16 %rs4, %rs1, 5;
82-
; CHECK-NEXT: mov.b32 %r3, {%rs4, %rs3};
80+
; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_vec_param_0];
81+
; CHECK-NEXT: shr.u16 %rs5, %rs4, 5;
82+
; CHECK-NEXT: shr.u16 %rs6, %rs3, 5;
83+
; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5};
8384
; CHECK-NEXT: or.b32 %r4, %r3, %r2;
8485
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
8586
; CHECK-NEXT: ret;

0 commit comments

Comments
 (0)