Skip to content

Commit 29e82fe

Browse files
committed
Use std::optional and update test
1 parent 656fa52 commit 29e82fe

File tree

3 files changed

+178
-41
lines changed

3 files changed

+178
-41
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2472,27 +2472,27 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
24722472
return false;
24732473
}
24742474

2475-
bool NVPTXDAGToDAGISel::FindRootAddressAndTotalOffset(
2476-
SDValue Addr, SDValue &Base, uint64_t &AccumulatedOffset) {
2475+
std::optional<uint64_t>
2476+
NVPTXDAGToDAGISel::FindRootAddressAndTotalOffset(SDValue Addr, SDValue &Base,
2477+
uint64_t AccumulatedOffset) {
24772478
if (Addr.getOpcode() == ISD::ADD) {
24782479
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
24792480
SDValue base = Addr.getOperand(0);
24802481
AccumulatedOffset += CN->getZExtValue();
24812482
if (SelectDirectAddr(base, Base))
2482-
return true;
2483+
return AccumulatedOffset;
24832484
return FindRootAddressAndTotalOffset(base, Base, AccumulatedOffset);
24842485
}
24852486
}
2486-
return false;
2487+
return std::nullopt;
24872488
}
24882489

24892490
// symbol+offset
24902491
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
24912492
SDValue &Base, SDValue &Offset,
24922493
MVT mvt) {
2493-
uint64_t AccumulatedOffset = 0;
2494-
if (FindRootAddressAndTotalOffset(Addr, Base, AccumulatedOffset)) {
2495-
Offset = CurDAG->getTargetConstant(AccumulatedOffset, SDLoc(OpNode), mvt);
2494+
if (auto AccumulatedOffset = FindRootAddressAndTotalOffset(Addr, Base, 0)) {
2495+
Offset = CurDAG->getTargetConstant(*AccumulatedOffset, SDLoc(OpNode), mvt);
24962496
return true;
24972497
}
24982498
return false;

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
9595
void SelectCpAsyncBulkTensorPrefetchCommon(SDNode *N, bool IsIm2Col = false);
9696
void SelectCpAsyncBulkTensorReduceCommon(SDNode *N, unsigned RedOp,
9797
bool IsIm2Col = false);
98-
bool FindRootAddressAndTotalOffset(SDValue Addr, SDValue &Base,
99-
uint64_t &AccumulatedOffset);
98+
std::optional<uint64_t>
99+
FindRootAddressAndTotalOffset(SDValue Addr, SDValue &Base,
100+
uint64_t AccumulatedOffset);
100101

101102
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
102103
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);

llvm/test/CodeGen/NVPTX/param-add.ll

Lines changed: 168 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,179 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | FileCheck %s
23
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
34

5+
; REQUIRES: asserts
6+
; asserts are required for --debug-counter=dagcombine=0 to have the intended
7+
; effect of disabling DAG combines, which exposes the bug. When combines are
8+
; enabled the bug does not occur.
9+
410
%struct.8float = type <{ [8 x float] }>
511

612
declare i32 @callee(%struct.8float %a)
713

814
define i32 @test(%struct.8float alignstack(32) %data) {
9-
;CHECK-NOT: add.
10-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0];
11-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+1];
12-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+2];
13-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+3];
14-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+4];
15-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+5];
16-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+6];
17-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+7];
18-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+8];
19-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+9];
20-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+10];
21-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+11];
22-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+12];
23-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+13];
24-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+14];
25-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+15];
26-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+16];
27-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+17];
28-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+18];
29-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+19];
30-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+20];
31-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+21];
32-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+22];
33-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+23];
34-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+24];
35-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+26];
36-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+27];
37-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+28];
38-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+29];
39-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+30];
40-
;CHECK-DAG: ld.param.u8 %r{{.*}}, [test_param_0+31];
15+
; CHECK-LABEL: test(
16+
; CHECK: {
17+
; CHECK-NEXT: .reg .b32 %r<123>;
18+
; CHECK-NEXT: .reg .f32 %f<9>;
19+
; CHECK-EMPTY:
20+
; CHECK-NEXT: // %bb.0:
21+
; CHECK-NEXT: ld.param.u8 %r1, [test_param_0+29];
22+
; CHECK-NEXT: shl.b32 %r2, %r1, 8;
23+
; CHECK-NEXT: ld.param.u8 %r3, [test_param_0+28];
24+
; CHECK-NEXT: or.b32 %r4, %r2, %r3;
25+
; CHECK-NEXT: ld.param.u8 %r5, [test_param_0+31];
26+
; CHECK-NEXT: shl.b32 %r6, %r5, 8;
27+
; CHECK-NEXT: ld.param.u8 %r7, [test_param_0+30];
28+
; CHECK-NEXT: or.b32 %r8, %r6, %r7;
29+
; CHECK-NEXT: shl.b32 %r9, %r8, 16;
30+
; CHECK-NEXT: or.b32 %r122, %r9, %r4;
31+
; CHECK-NEXT: mov.b32 %f1, %r122;
32+
; CHECK-NEXT: ld.param.u8 %r11, [test_param_0+25];
33+
; CHECK-NEXT: shl.b32 %r12, %r11, 8;
34+
; CHECK-NEXT: ld.param.u8 %r13, [test_param_0+24];
35+
; CHECK-NEXT: or.b32 %r14, %r12, %r13;
36+
; CHECK-NEXT: ld.param.u8 %r15, [test_param_0+27];
37+
; CHECK-NEXT: shl.b32 %r16, %r15, 8;
38+
; CHECK-NEXT: ld.param.u8 %r17, [test_param_0+26];
39+
; CHECK-NEXT: or.b32 %r18, %r16, %r17;
40+
; CHECK-NEXT: shl.b32 %r19, %r18, 16;
41+
; CHECK-NEXT: or.b32 %r121, %r19, %r14;
42+
; CHECK-NEXT: mov.b32 %f2, %r121;
43+
; CHECK-NEXT: ld.param.u8 %r21, [test_param_0+21];
44+
; CHECK-NEXT: shl.b32 %r22, %r21, 8;
45+
; CHECK-NEXT: ld.param.u8 %r23, [test_param_0+20];
46+
; CHECK-NEXT: or.b32 %r24, %r22, %r23;
47+
; CHECK-NEXT: ld.param.u8 %r25, [test_param_0+23];
48+
; CHECK-NEXT: shl.b32 %r26, %r25, 8;
49+
; CHECK-NEXT: ld.param.u8 %r27, [test_param_0+22];
50+
; CHECK-NEXT: or.b32 %r28, %r26, %r27;
51+
; CHECK-NEXT: shl.b32 %r29, %r28, 16;
52+
; CHECK-NEXT: or.b32 %r120, %r29, %r24;
53+
; CHECK-NEXT: mov.b32 %f3, %r120;
54+
; CHECK-NEXT: ld.param.u8 %r31, [test_param_0+17];
55+
; CHECK-NEXT: shl.b32 %r32, %r31, 8;
56+
; CHECK-NEXT: ld.param.u8 %r33, [test_param_0+16];
57+
; CHECK-NEXT: or.b32 %r34, %r32, %r33;
58+
; CHECK-NEXT: ld.param.u8 %r35, [test_param_0+19];
59+
; CHECK-NEXT: shl.b32 %r36, %r35, 8;
60+
; CHECK-NEXT: ld.param.u8 %r37, [test_param_0+18];
61+
; CHECK-NEXT: or.b32 %r38, %r36, %r37;
62+
; CHECK-NEXT: shl.b32 %r39, %r38, 16;
63+
; CHECK-NEXT: or.b32 %r119, %r39, %r34;
64+
; CHECK-NEXT: mov.b32 %f4, %r119;
65+
; CHECK-NEXT: ld.param.u8 %r41, [test_param_0+13];
66+
; CHECK-NEXT: shl.b32 %r42, %r41, 8;
67+
; CHECK-NEXT: ld.param.u8 %r43, [test_param_0+12];
68+
; CHECK-NEXT: or.b32 %r44, %r42, %r43;
69+
; CHECK-NEXT: ld.param.u8 %r45, [test_param_0+15];
70+
; CHECK-NEXT: shl.b32 %r46, %r45, 8;
71+
; CHECK-NEXT: ld.param.u8 %r47, [test_param_0+14];
72+
; CHECK-NEXT: or.b32 %r48, %r46, %r47;
73+
; CHECK-NEXT: shl.b32 %r49, %r48, 16;
74+
; CHECK-NEXT: or.b32 %r118, %r49, %r44;
75+
; CHECK-NEXT: mov.b32 %f5, %r118;
76+
; CHECK-NEXT: ld.param.u8 %r51, [test_param_0+9];
77+
; CHECK-NEXT: shl.b32 %r52, %r51, 8;
78+
; CHECK-NEXT: ld.param.u8 %r53, [test_param_0+8];
79+
; CHECK-NEXT: or.b32 %r54, %r52, %r53;
80+
; CHECK-NEXT: ld.param.u8 %r55, [test_param_0+11];
81+
; CHECK-NEXT: shl.b32 %r56, %r55, 8;
82+
; CHECK-NEXT: ld.param.u8 %r57, [test_param_0+10];
83+
; CHECK-NEXT: or.b32 %r58, %r56, %r57;
84+
; CHECK-NEXT: shl.b32 %r59, %r58, 16;
85+
; CHECK-NEXT: or.b32 %r117, %r59, %r54;
86+
; CHECK-NEXT: mov.b32 %f6, %r117;
87+
; CHECK-NEXT: ld.param.u8 %r61, [test_param_0+5];
88+
; CHECK-NEXT: shl.b32 %r62, %r61, 8;
89+
; CHECK-NEXT: ld.param.u8 %r63, [test_param_0+4];
90+
; CHECK-NEXT: or.b32 %r64, %r62, %r63;
91+
; CHECK-NEXT: ld.param.u8 %r65, [test_param_0+7];
92+
; CHECK-NEXT: shl.b32 %r66, %r65, 8;
93+
; CHECK-NEXT: ld.param.u8 %r67, [test_param_0+6];
94+
; CHECK-NEXT: or.b32 %r68, %r66, %r67;
95+
; CHECK-NEXT: shl.b32 %r69, %r68, 16;
96+
; CHECK-NEXT: or.b32 %r116, %r69, %r64;
97+
; CHECK-NEXT: mov.b32 %f7, %r116;
98+
; CHECK-NEXT: ld.param.u8 %r71, [test_param_0+1];
99+
; CHECK-NEXT: shl.b32 %r72, %r71, 8;
100+
; CHECK-NEXT: ld.param.u8 %r73, [test_param_0];
101+
; CHECK-NEXT: or.b32 %r74, %r72, %r73;
102+
; CHECK-NEXT: ld.param.u8 %r75, [test_param_0+3];
103+
; CHECK-NEXT: shl.b32 %r76, %r75, 8;
104+
; CHECK-NEXT: ld.param.u8 %r77, [test_param_0+2];
105+
; CHECK-NEXT: or.b32 %r78, %r76, %r77;
106+
; CHECK-NEXT: shl.b32 %r79, %r78, 16;
107+
; CHECK-NEXT: or.b32 %r115, %r79, %r74;
108+
; CHECK-NEXT: mov.b32 %f8, %r115;
109+
; CHECK-NEXT: shr.u32 %r82, %r115, 8;
110+
; CHECK-NEXT: shr.u32 %r83, %r115, 16;
111+
; CHECK-NEXT: shr.u32 %r84, %r115, 24;
112+
; CHECK-NEXT: shr.u32 %r86, %r116, 8;
113+
; CHECK-NEXT: shr.u32 %r87, %r116, 16;
114+
; CHECK-NEXT: shr.u32 %r88, %r116, 24;
115+
; CHECK-NEXT: shr.u32 %r90, %r117, 8;
116+
; CHECK-NEXT: shr.u32 %r91, %r117, 16;
117+
; CHECK-NEXT: shr.u32 %r92, %r117, 24;
118+
; CHECK-NEXT: shr.u32 %r94, %r118, 8;
119+
; CHECK-NEXT: shr.u32 %r95, %r118, 16;
120+
; CHECK-NEXT: shr.u32 %r96, %r118, 24;
121+
; CHECK-NEXT: shr.u32 %r98, %r119, 8;
122+
; CHECK-NEXT: shr.u32 %r99, %r119, 16;
123+
; CHECK-NEXT: shr.u32 %r100, %r119, 24;
124+
; CHECK-NEXT: shr.u32 %r102, %r120, 8;
125+
; CHECK-NEXT: shr.u32 %r103, %r120, 16;
126+
; CHECK-NEXT: shr.u32 %r104, %r120, 24;
127+
; CHECK-NEXT: shr.u32 %r106, %r121, 8;
128+
; CHECK-NEXT: shr.u32 %r107, %r121, 16;
129+
; CHECK-NEXT: shr.u32 %r108, %r121, 24;
130+
; CHECK-NEXT: shr.u32 %r110, %r122, 8;
131+
; CHECK-NEXT: shr.u32 %r111, %r122, 16;
132+
; CHECK-NEXT: shr.u32 %r112, %r122, 24;
133+
; CHECK-NEXT: { // callseq 0, 0
134+
; CHECK-NEXT: .param .align 1 .b8 param0[32];
135+
; CHECK-NEXT: st.param.b8 [param0], %r115;
136+
; CHECK-NEXT: st.param.b8 [param0+1], %r82;
137+
; CHECK-NEXT: st.param.b8 [param0+2], %r83;
138+
; CHECK-NEXT: st.param.b8 [param0+3], %r84;
139+
; CHECK-NEXT: st.param.b8 [param0+4], %r116;
140+
; CHECK-NEXT: st.param.b8 [param0+5], %r86;
141+
; CHECK-NEXT: st.param.b8 [param0+6], %r87;
142+
; CHECK-NEXT: st.param.b8 [param0+7], %r88;
143+
; CHECK-NEXT: st.param.b8 [param0+8], %r117;
144+
; CHECK-NEXT: st.param.b8 [param0+9], %r90;
145+
; CHECK-NEXT: st.param.b8 [param0+10], %r91;
146+
; CHECK-NEXT: st.param.b8 [param0+11], %r92;
147+
; CHECK-NEXT: st.param.b8 [param0+12], %r118;
148+
; CHECK-NEXT: st.param.b8 [param0+13], %r94;
149+
; CHECK-NEXT: st.param.b8 [param0+14], %r95;
150+
; CHECK-NEXT: st.param.b8 [param0+15], %r96;
151+
; CHECK-NEXT: st.param.b8 [param0+16], %r119;
152+
; CHECK-NEXT: st.param.b8 [param0+17], %r98;
153+
; CHECK-NEXT: st.param.b8 [param0+18], %r99;
154+
; CHECK-NEXT: st.param.b8 [param0+19], %r100;
155+
; CHECK-NEXT: st.param.b8 [param0+20], %r120;
156+
; CHECK-NEXT: st.param.b8 [param0+21], %r102;
157+
; CHECK-NEXT: st.param.b8 [param0+22], %r103;
158+
; CHECK-NEXT: st.param.b8 [param0+23], %r104;
159+
; CHECK-NEXT: st.param.b8 [param0+24], %r121;
160+
; CHECK-NEXT: st.param.b8 [param0+25], %r106;
161+
; CHECK-NEXT: st.param.b8 [param0+26], %r107;
162+
; CHECK-NEXT: st.param.b8 [param0+27], %r108;
163+
; CHECK-NEXT: st.param.b8 [param0+28], %r122;
164+
; CHECK-NEXT: st.param.b8 [param0+29], %r110;
165+
; CHECK-NEXT: st.param.b8 [param0+30], %r111;
166+
; CHECK-NEXT: st.param.b8 [param0+31], %r112;
167+
; CHECK-NEXT: .param .b32 retval0;
168+
; CHECK-NEXT: call.uni (retval0),
169+
; CHECK-NEXT: callee,
170+
; CHECK-NEXT: (
171+
; CHECK-NEXT: param0
172+
; CHECK-NEXT: );
173+
; CHECK-NEXT: ld.param.b32 %r113, [retval0];
174+
; CHECK-NEXT: } // callseq 0
175+
; CHECK-NEXT: st.param.b32 [func_retval0], %r113;
176+
; CHECK-NEXT: ret;
41177

42178
%1 = call i32 @callee(%struct.8float %data)
43179
ret i32 %1

0 commit comments

Comments
 (0)