Skip to content

Commit 554db43

Browse files
committed
address comments + cleanup
1 parent ee990a9 commit 554db43

File tree

4 files changed

+40
-39
lines changed

4 files changed

+40
-39
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5113,7 +5113,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
51135113
DAG.getConstant(NVT.getSizeInBits() -
51145114
OVT.getSizeInBits(), dl, NVT));
51155115
}
5116-
Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
5116+
Results.push_back(
5117+
DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1, SDNodeFlags::NoWrap));
51175118
break;
51185119
}
51195120
case ISD::CTLZ_ZERO_UNDEF: {

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 12 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -767,8 +767,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
767767
setOperationAction({ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX},
768768
{MVT::i16, MVT::i32, MVT::i64}, Legal);
769769

770+
setOperationAction({ISD::CTPOP, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i16,
771+
Promote);
770772
setOperationAction({ISD::CTPOP, ISD::CTLZ}, MVT::i32, Legal);
771-
setOperationAction({ISD::CTPOP, ISD::CTLZ}, {MVT::i16, MVT::i64}, Custom);
773+
setOperationAction({ISD::CTPOP, ISD::CTLZ}, MVT::i64, Custom);
772774

773775
setI16x2OperationAction(ISD::ABS, MVT::v2i16, Legal, Custom);
774776
setI16x2OperationAction(ISD::SMIN, MVT::v2i16, Legal, Custom);
@@ -2743,40 +2745,17 @@ static SDValue LowerIntrinsicVoid(SDValue Op, SelectionDAG &DAG) {
27432745
return Op;
27442746
}
27452747

2746-
static SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2748+
// In PTX 64-bit CTLZ and CTPOP are supported, but they return a 32-bit value.
2749+
// Lower these into a node returning the correct type which is zero-extended
2750+
// back to the correct size.
2751+
static SDValue lowerCTLZCTPOP(SDValue Op, SelectionDAG &DAG) {
27472752
SDValue V = Op->getOperand(0);
2748-
SDLoc DL(Op);
2749-
2750-
if (V.getValueType() == MVT::i16) {
2751-
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V);
2752-
SDValue CT = DAG.getNode(ISD::CTPOP, DL, MVT::i32, Zext);
2753-
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, CT, SDNodeFlags::NoWrap);
2754-
}
2755-
if (V.getValueType() == MVT::i64) {
2756-
SDValue CT = DAG.getNode(ISD::CTPOP, DL, MVT::i32, V);
2757-
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, CT);
2758-
}
2759-
llvm_unreachable("Unexpected CTPOP type to legalize");
2760-
}
2753+
assert(V.getValueType() == MVT::i64 &&
2754+
"Unexpected CTLZ/CTPOP type to legalize");
27612755

2762-
static SDValue lowerCTLZ(SDValue Op, SelectionDAG &DAG) {
2763-
SDValue V = Op->getOperand(0);
27642756
SDLoc DL(Op);
2765-
2766-
if (V.getValueType() == MVT::i16) {
2767-
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V);
2768-
SDValue CT = DAG.getNode(ISD::CTLZ, DL, MVT::i32, Zext);
2769-
SDValue Sub =
2770-
DAG.getNode(ISD::ADD, DL, MVT::i32, CT,
2771-
DAG.getConstant(APInt(32, -16, true), DL, MVT::i32),
2772-
SDNodeFlags::NoSignedWrap);
2773-
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Sub, SDNodeFlags::NoWrap);
2774-
}
2775-
if (V.getValueType() == MVT::i64) {
2776-
SDValue CT = DAG.getNode(ISD::CTLZ, DL, MVT::i32, V);
2777-
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, CT);
2778-
}
2779-
llvm_unreachable("Unexpected CTLZ type to legalize");
2757+
SDValue CT = DAG.getNode(Op->getOpcode(), DL, MVT::i32, V);
2758+
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, CT, SDNodeFlags::NonNeg);
27802759
}
27812760

27822761
SDValue
@@ -2865,9 +2844,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
28652844
// Used only for bf16 on SM80, where we select fma for non-ftz operation
28662845
return PromoteBinOpIfF32FTZ(Op, DAG);
28672846
case ISD::CTPOP:
2868-
return lowerCTPOP(Op, DAG);
28692847
case ISD::CTLZ:
2870-
return lowerCTLZ(Op, DAG);
2848+
return lowerCTLZCTPOP(Op, DAG);
28712849

28722850
default:
28732851
llvm_unreachable("Custom lowering not defined for operation");

llvm/test/CodeGen/NVPTX/ctlz.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,8 @@ define i16 @myctlz_ret16_2(i16 %a) {
130130
; CHECK-EMPTY:
131131
; CHECK-NEXT: // %bb.0:
132132
; CHECK-NEXT: ld.param.u16 %r1, [myctlz_ret16_2_param_0];
133-
; CHECK-NEXT: clz.b32 %r2, %r1;
134-
; CHECK-NEXT: add.s32 %r3, %r2, -16;
133+
; CHECK-NEXT: shl.b32 %r2, %r1, 16;
134+
; CHECK-NEXT: clz.b32 %r3, %r2;
135135
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
136136
; CHECK-NEXT: ret;
137137
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone

llvm/test/CodeGen/NVPTX/intrinsics.ll

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s
3-
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
2+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s --check-prefixes=CHECK,CHECK32
3+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=CHECK,CHECK64
44
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify %}
55
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
66

@@ -140,6 +140,28 @@ define i32 @test_popc64_trunc(i64 %a) {
140140
; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
141141
; then converting back to i16.
142142
define void @test_popc16(i16 %a, ptr %b) {
143+
; CHECK32-LABEL: test_popc16(
144+
; CHECK32: {
145+
; CHECK32-NEXT: .reg .b32 %r<4>;
146+
; CHECK32-EMPTY:
147+
; CHECK32-NEXT: // %bb.0:
148+
; CHECK32-NEXT: ld.param.u16 %r1, [test_popc16_param_0];
149+
; CHECK32-NEXT: popc.b32 %r2, %r1;
150+
; CHECK32-NEXT: ld.param.u32 %r3, [test_popc16_param_1];
151+
; CHECK32-NEXT: st.u16 [%r3], %r2;
152+
; CHECK32-NEXT: ret;
153+
;
154+
; CHECK64-LABEL: test_popc16(
155+
; CHECK64: {
156+
; CHECK64-NEXT: .reg .b32 %r<3>;
157+
; CHECK64-NEXT: .reg .b64 %rd<2>;
158+
; CHECK64-EMPTY:
159+
; CHECK64-NEXT: // %bb.0:
160+
; CHECK64-NEXT: ld.param.u16 %r1, [test_popc16_param_0];
161+
; CHECK64-NEXT: popc.b32 %r2, %r1;
162+
; CHECK64-NEXT: ld.param.u64 %rd1, [test_popc16_param_1];
163+
; CHECK64-NEXT: st.u16 [%rd1], %r2;
164+
; CHECK64-NEXT: ret;
143165
%val = call i16 @llvm.ctpop.i16(i16 %a)
144166
store i16 %val, ptr %b
145167
ret void

0 commit comments

Comments
 (0)