Skip to content

Commit 5abee2b

Browse files
committed
[SelectionDAG][RISCV] Teach computeKnownBits to use range metadata for atomic_load.
And teach SelectionDAGBuilder to get the range metadata in visitAtomicLoad. This allows us to recognize that sign extending a byte load of a boolean value from memory will produces zeros for the extended bits. This allow us to remove an AND on RISC-V. Tests copied from llvm#136502 with range metadata added to i1 cases. Some of the test effects overlap with llvm#136502, but that patch can't handle the acquire or seq_cst cases with the Zalasr extension. We only have sign extending versions of those loads.
1 parent 0b5a200 commit 5abee2b

File tree

3 files changed

+36
-37
lines changed

3 files changed

+36
-37
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4382,6 +4382,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
43824382
Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits());
43834383
break;
43844384
}
4385+
case ISD::ATOMIC_LOAD: {
4386+
// If we are looking at the loaded value.
4387+
if (Op.getResNo() == 0) {
4388+
auto *AT = cast<AtomicSDNode>(Op);
4389+
unsigned ScalarMemorySize = AT->getMemoryVT().getScalarSizeInBits();
4390+
KnownBits KnownScalarMemory(ScalarMemorySize);
4391+
if (const MDNode *MD = AT->getRanges())
4392+
computeKnownBitsFromRangeMetadata(*MD, KnownScalarMemory);
4393+
4394+
switch (AT->getExtensionType()) {
4395+
case ISD::ZEXTLOAD:
4396+
Known = KnownScalarMemory.zext(BitWidth);
4397+
break;
4398+
case ISD::SEXTLOAD:
4399+
Known = KnownScalarMemory.sext(BitWidth);
4400+
break;
4401+
case ISD::EXTLOAD:
4402+
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
4403+
Known = KnownScalarMemory.zext(BitWidth);
4404+
else if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND)
4405+
Known = KnownScalarMemory.sext(BitWidth);
4406+
else
4407+
Known = KnownScalarMemory.anyext(BitWidth);
4408+
break;
4409+
case ISD::NON_EXTLOAD:
4410+
Known = KnownScalarMemory;
4411+
break;
4412+
}
4413+
assert(Known.getBitWidth() == BitWidth);
4414+
}
4415+
break;
4416+
}
43854417
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
43864418
if (Op.getResNo() == 1) {
43874419
// The boolean result conforms to getBooleanContents.
@@ -4407,21 +4439,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
44074439
case ISD::ATOMIC_LOAD_MIN:
44084440
case ISD::ATOMIC_LOAD_MAX:
44094441
case ISD::ATOMIC_LOAD_UMIN:
4410-
case ISD::ATOMIC_LOAD_UMAX:
4411-
case ISD::ATOMIC_LOAD: {
4442+
case ISD::ATOMIC_LOAD_UMAX: {
44124443
// If we are looking at the loaded value.
44134444
if (Op.getResNo() == 0) {
44144445
auto *AT = cast<AtomicSDNode>(Op);
44154446
unsigned MemBits = AT->getMemoryVT().getScalarSizeInBits();
44164447

4417-
// For atomic_load, prefer to use the extension type.
4418-
if (Op->getOpcode() == ISD::ATOMIC_LOAD) {
4419-
if (AT->getExtensionType() == ISD::ZEXTLOAD)
4420-
Known.Zero.setBitsFrom(MemBits);
4421-
else if (AT->getExtensionType() != ISD::SEXTLOAD &&
4422-
TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
4423-
Known.Zero.setBitsFrom(MemBits);
4424-
} else if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
4448+
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
44254449
Known.Zero.setBitsFrom(MemBits);
44264450
}
44274451
break;

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5150,9 +5150,10 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
51505150

51515151
auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
51525152

5153+
const MDNode *Ranges = getRangeMetadata(I);
51535154
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
51545155
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
5155-
I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
5156+
I.getAlign(), AAMDNodes(), Ranges, SSID, Order);
51565157

51575158
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
51585159

llvm/test/CodeGen/RISCV/atomic-load-zext.ll

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
4848
; RV32IA-LABEL: atomic_load_i1_unordered:
4949
; RV32IA: # %bb.0:
5050
; RV32IA-NEXT: lb a0, 0(a0)
51-
; RV32IA-NEXT: zext.b a0, a0
5251
; RV32IA-NEXT: ret
5352
;
5453
; RV64I-LABEL: atomic_load_i1_unordered:
@@ -65,7 +64,6 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind {
6564
; RV64IA-LABEL: atomic_load_i1_unordered:
6665
; RV64IA: # %bb.0:
6766
; RV64IA-NEXT: lb a0, 0(a0)
68-
; RV64IA-NEXT: zext.b a0, a0
6967
; RV64IA-NEXT: ret
7068
%1 = load atomic i8, ptr %a unordered, align 1, !range !0, !noundef !1
7169
%2 = trunc nuw i8 %1 to i1
@@ -87,7 +85,6 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
8785
; RV32IA-LABEL: atomic_load_i1_monotonic:
8886
; RV32IA: # %bb.0:
8987
; RV32IA-NEXT: lb a0, 0(a0)
90-
; RV32IA-NEXT: zext.b a0, a0
9188
; RV32IA-NEXT: ret
9289
;
9390
; RV64I-LABEL: atomic_load_i1_monotonic:
@@ -104,7 +101,6 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind {
104101
; RV64IA-LABEL: atomic_load_i1_monotonic:
105102
; RV64IA: # %bb.0:
106103
; RV64IA-NEXT: lb a0, 0(a0)
107-
; RV64IA-NEXT: zext.b a0, a0
108104
; RV64IA-NEXT: ret
109105
%1 = load atomic i8, ptr %a monotonic, align 1, !range !0, !noundef !1
110106
%2 = trunc nuw i8 %1 to i1
@@ -127,13 +123,11 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
127123
; RV32IA-WMO: # %bb.0:
128124
; RV32IA-WMO-NEXT: lb a0, 0(a0)
129125
; RV32IA-WMO-NEXT: fence r, rw
130-
; RV32IA-WMO-NEXT: zext.b a0, a0
131126
; RV32IA-WMO-NEXT: ret
132127
;
133128
; RV32IA-TSO-LABEL: atomic_load_i1_acquire:
134129
; RV32IA-TSO: # %bb.0:
135130
; RV32IA-TSO-NEXT: lb a0, 0(a0)
136-
; RV32IA-TSO-NEXT: zext.b a0, a0
137131
; RV32IA-TSO-NEXT: ret
138132
;
139133
; RV64I-LABEL: atomic_load_i1_acquire:
@@ -151,63 +145,53 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind {
151145
; RV64IA-WMO: # %bb.0:
152146
; RV64IA-WMO-NEXT: lb a0, 0(a0)
153147
; RV64IA-WMO-NEXT: fence r, rw
154-
; RV64IA-WMO-NEXT: zext.b a0, a0
155148
; RV64IA-WMO-NEXT: ret
156149
;
157150
; RV64IA-TSO-LABEL: atomic_load_i1_acquire:
158151
; RV64IA-TSO: # %bb.0:
159152
; RV64IA-TSO-NEXT: lb a0, 0(a0)
160-
; RV64IA-TSO-NEXT: zext.b a0, a0
161153
; RV64IA-TSO-NEXT: ret
162154
;
163155
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
164156
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
165157
; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
166158
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
167-
; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
168159
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
169160
;
170161
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
171162
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
172163
; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
173-
; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
174164
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
175165
;
176166
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
177167
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
178168
; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
179169
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
180-
; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
181170
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
182171
;
183172
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire:
184173
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
185174
; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
186-
; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
187175
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
188176
;
189177
; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
190178
; RV32IA-ZALASR-WMO: # %bb.0:
191179
; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
192-
; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0
193180
; RV32IA-ZALASR-WMO-NEXT: ret
194181
;
195182
; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
196183
; RV32IA-ZALASR-TSO: # %bb.0:
197184
; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
198-
; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0
199185
; RV32IA-ZALASR-TSO-NEXT: ret
200186
;
201187
; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire:
202188
; RV64IA-ZALASR-WMO: # %bb.0:
203189
; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0)
204-
; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0
205190
; RV64IA-ZALASR-WMO-NEXT: ret
206191
;
207192
; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire:
208193
; RV64IA-ZALASR-TSO: # %bb.0:
209194
; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0)
210-
; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0
211195
; RV64IA-ZALASR-TSO-NEXT: ret
212196
%1 = load atomic i8, ptr %a acquire, align 1, !range !0, !noundef !1
213197
%2 = trunc nuw i8 %1 to i1
@@ -230,15 +214,13 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
230214
; RV32IA-WMO: # %bb.0:
231215
; RV32IA-WMO-NEXT: fence rw, rw
232216
; RV32IA-WMO-NEXT: lb a0, 0(a0)
233-
; RV32IA-WMO-NEXT: zext.b a0, a0
234217
; RV32IA-WMO-NEXT: fence r, rw
235218
; RV32IA-WMO-NEXT: ret
236219
;
237220
; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst:
238221
; RV32IA-TSO: # %bb.0:
239222
; RV32IA-TSO-NEXT: fence rw, rw
240223
; RV32IA-TSO-NEXT: lb a0, 0(a0)
241-
; RV32IA-TSO-NEXT: zext.b a0, a0
242224
; RV32IA-TSO-NEXT: ret
243225
;
244226
; RV64I-LABEL: atomic_load_i1_seq_cst:
@@ -256,57 +238,49 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind {
256238
; RV64IA-WMO: # %bb.0:
257239
; RV64IA-WMO-NEXT: fence rw, rw
258240
; RV64IA-WMO-NEXT: lb a0, 0(a0)
259-
; RV64IA-WMO-NEXT: zext.b a0, a0
260241
; RV64IA-WMO-NEXT: fence r, rw
261242
; RV64IA-WMO-NEXT: ret
262243
;
263244
; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst:
264245
; RV64IA-TSO: # %bb.0:
265246
; RV64IA-TSO-NEXT: fence rw, rw
266247
; RV64IA-TSO-NEXT: lb a0, 0(a0)
267-
; RV64IA-TSO-NEXT: zext.b a0, a0
268248
; RV64IA-TSO-NEXT: ret
269249
;
270250
; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
271251
; RV32IA-WMO-TRAILING-FENCE: # %bb.0:
272252
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
273253
; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
274-
; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
275254
; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
276255
; RV32IA-WMO-TRAILING-FENCE-NEXT: ret
277256
;
278257
; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
279258
; RV32IA-TSO-TRAILING-FENCE: # %bb.0:
280259
; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
281260
; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
282-
; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
283261
; RV32IA-TSO-TRAILING-FENCE-NEXT: ret
284262
;
285263
; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
286264
; RV64IA-WMO-TRAILING-FENCE: # %bb.0:
287265
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw
288266
; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
289-
; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0
290267
; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw
291268
; RV64IA-WMO-TRAILING-FENCE-NEXT: ret
292269
;
293270
; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst:
294271
; RV64IA-TSO-TRAILING-FENCE: # %bb.0:
295272
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
296273
; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0)
297-
; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0
298274
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
299275
;
300276
; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
301277
; RV32IA-ZALASR: # %bb.0:
302278
; RV32IA-ZALASR-NEXT: lb.aq a0, (a0)
303-
; RV32IA-ZALASR-NEXT: zext.b a0, a0
304279
; RV32IA-ZALASR-NEXT: ret
305280
;
306281
; RV64IA-ZALASR-LABEL: atomic_load_i1_seq_cst:
307282
; RV64IA-ZALASR: # %bb.0:
308283
; RV64IA-ZALASR-NEXT: lb.aq a0, (a0)
309-
; RV64IA-ZALASR-NEXT: zext.b a0, a0
310284
; RV64IA-ZALASR-NEXT: ret
311285
%1 = load atomic i8, ptr %a seq_cst, align 1, !range !0, !noundef !1
312286
%2 = trunc nuw i8 %1 to i1

0 commit comments

Comments
 (0)