Skip to content

Commit 81740e0

Browse files
ReVe1uvKane Wang
andauthored
[RISCV][GlobalISel] Legalize and select G_ATOMICRMW_ADD instruction (llvm#153791)
This patch adds legalization and instruction selection support for the `G_ATOMICRMW_ADD` opcode in the RISCV GlobalISel backend. Support for other opcodes will be added in subsequent PRs. Co-authored-by: Kane Wang <[email protected]>
1 parent fadd87e commit 81740e0

File tree

7 files changed

+704
-2
lines changed

7 files changed

+704
-2
lines changed

llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include "llvm/CodeGen/TargetOpcodes.h"
2727
#include "llvm/CodeGen/ValueTypes.h"
2828
#include "llvm/IR/DerivedTypes.h"
29+
#include "llvm/IR/Intrinsics.h"
30+
#include "llvm/IR/IntrinsicsRISCV.h"
2931
#include "llvm/IR/Type.h"
3032

3133
using namespace llvm;
@@ -692,6 +694,11 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
692694
.customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
693695
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
694696

697+
getActionDefinitionsBuilder(G_ATOMICRMW_ADD)
698+
.legalFor(ST.hasStdExtA(), {{sXLen, p0}})
699+
.libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
700+
.clampScalar(0, sXLen, sXLen);
701+
695702
getLegacyLegalizerInfo().computeTables();
696703
verify(*ST.getInstrInfo());
697704
}
@@ -729,6 +736,8 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
729736
MI.eraseFromParent();
730737
return true;
731738
}
739+
case Intrinsic::riscv_masked_atomicrmw_add:
740+
return true;
732741
}
733742
}
734743

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32IA-ZABHA
3+
; RUN: llc -mtriple=riscv32 -mattr=+a -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32IA
4+
; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
5+
; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64IA-ZABHA
6+
; RUN: llc -mtriple=riscv64 -mattr=+a -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64IA
7+
; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
8+
9+
define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
10+
; RV32IA-ZABHA-LABEL: atomicrmw_add_i8:
11+
; RV32IA-ZABHA: # %bb.0:
12+
; RV32IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
13+
; RV32IA-ZABHA-NEXT: ret
14+
;
15+
; RV32IA-LABEL: atomicrmw_add_i8:
16+
; RV32IA: # %bb.0:
17+
; RV32IA-NEXT: li a2, 255
18+
; RV32IA-NEXT: andi a3, a0, -4
19+
; RV32IA-NEXT: andi a0, a0, 3
20+
; RV32IA-NEXT: zext.b a1, a1
21+
; RV32IA-NEXT: slli a0, a0, 3
22+
; RV32IA-NEXT: sll a2, a2, a0
23+
; RV32IA-NEXT: sll a1, a1, a0
24+
; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
25+
; RV32IA-NEXT: lr.w.aqrl a4, (a3)
26+
; RV32IA-NEXT: add a5, a4, a1
27+
; RV32IA-NEXT: xor a5, a4, a5
28+
; RV32IA-NEXT: and a5, a5, a2
29+
; RV32IA-NEXT: xor a5, a4, a5
30+
; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
31+
; RV32IA-NEXT: bnez a5, .LBB0_1
32+
; RV32IA-NEXT: # %bb.2:
33+
; RV32IA-NEXT: srl a0, a4, a0
34+
; RV32IA-NEXT: ret
35+
;
36+
; RV32-LABEL: atomicrmw_add_i8:
37+
; RV32: # %bb.0:
38+
; RV32-NEXT: addi sp, sp, -16
39+
; RV32-NEXT: .cfi_def_cfa_offset 16
40+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
41+
; RV32-NEXT: .cfi_offset ra, -4
42+
; RV32-NEXT: li a2, 5
43+
; RV32-NEXT: call __atomic_fetch_add_1
44+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
45+
; RV32-NEXT: .cfi_restore ra
46+
; RV32-NEXT: addi sp, sp, 16
47+
; RV32-NEXT: .cfi_def_cfa_offset 0
48+
; RV32-NEXT: ret
49+
;
50+
; RV64IA-ZABHA-LABEL: atomicrmw_add_i8:
51+
; RV64IA-ZABHA: # %bb.0:
52+
; RV64IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
53+
; RV64IA-ZABHA-NEXT: ret
54+
;
55+
; RV64IA-LABEL: atomicrmw_add_i8:
56+
; RV64IA: # %bb.0:
57+
; RV64IA-NEXT: li a2, 255
58+
; RV64IA-NEXT: andi a3, a0, -4
59+
; RV64IA-NEXT: andi a0, a0, 3
60+
; RV64IA-NEXT: zext.b a1, a1
61+
; RV64IA-NEXT: slli a0, a0, 3
62+
; RV64IA-NEXT: sllw a2, a2, a0
63+
; RV64IA-NEXT: sllw a1, a1, a0
64+
; RV64IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
65+
; RV64IA-NEXT: lr.w.aqrl a4, (a3)
66+
; RV64IA-NEXT: add a5, a4, a1
67+
; RV64IA-NEXT: xor a5, a4, a5
68+
; RV64IA-NEXT: and a5, a5, a2
69+
; RV64IA-NEXT: xor a5, a4, a5
70+
; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
71+
; RV64IA-NEXT: bnez a5, .LBB0_1
72+
; RV64IA-NEXT: # %bb.2:
73+
; RV64IA-NEXT: srlw a0, a4, a0
74+
; RV64IA-NEXT: ret
75+
;
76+
; RV64-LABEL: atomicrmw_add_i8:
77+
; RV64: # %bb.0:
78+
; RV64-NEXT: addi sp, sp, -16
79+
; RV64-NEXT: .cfi_def_cfa_offset 16
80+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
81+
; RV64-NEXT: .cfi_offset ra, -8
82+
; RV64-NEXT: li a2, 5
83+
; RV64-NEXT: call __atomic_fetch_add_1
84+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
85+
; RV64-NEXT: .cfi_restore ra
86+
; RV64-NEXT: addi sp, sp, 16
87+
; RV64-NEXT: .cfi_def_cfa_offset 0
88+
; RV64-NEXT: ret
89+
%res = atomicrmw add ptr %ptr, i8 %rhs seq_cst
90+
ret i8 %res
91+
}
92+
93+
define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
94+
; RV32IA-ZABHA-LABEL: atomicrmw_add_i16:
95+
; RV32IA-ZABHA: # %bb.0:
96+
; RV32IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
97+
; RV32IA-ZABHA-NEXT: ret
98+
;
99+
; RV32IA-LABEL: atomicrmw_add_i16:
100+
; RV32IA: # %bb.0:
101+
; RV32IA-NEXT: lui a2, 16
102+
; RV32IA-NEXT: andi a3, a0, -4
103+
; RV32IA-NEXT: andi a0, a0, 3
104+
; RV32IA-NEXT: addi a2, a2, -1
105+
; RV32IA-NEXT: slli a0, a0, 3
106+
; RV32IA-NEXT: sll a4, a2, a0
107+
; RV32IA-NEXT: and a1, a1, a2
108+
; RV32IA-NEXT: sll a1, a1, a0
109+
; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
110+
; RV32IA-NEXT: lr.w.aqrl a2, (a3)
111+
; RV32IA-NEXT: add a5, a2, a1
112+
; RV32IA-NEXT: xor a5, a2, a5
113+
; RV32IA-NEXT: and a5, a5, a4
114+
; RV32IA-NEXT: xor a5, a2, a5
115+
; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
116+
; RV32IA-NEXT: bnez a5, .LBB1_1
117+
; RV32IA-NEXT: # %bb.2:
118+
; RV32IA-NEXT: srl a0, a2, a0
119+
; RV32IA-NEXT: ret
120+
;
121+
; RV32-LABEL: atomicrmw_add_i16:
122+
; RV32: # %bb.0:
123+
; RV32-NEXT: addi sp, sp, -16
124+
; RV32-NEXT: .cfi_def_cfa_offset 16
125+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
126+
; RV32-NEXT: .cfi_offset ra, -4
127+
; RV32-NEXT: li a2, 5
128+
; RV32-NEXT: call __atomic_fetch_add_2
129+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
130+
; RV32-NEXT: .cfi_restore ra
131+
; RV32-NEXT: addi sp, sp, 16
132+
; RV32-NEXT: .cfi_def_cfa_offset 0
133+
; RV32-NEXT: ret
134+
;
135+
; RV64IA-ZABHA-LABEL: atomicrmw_add_i16:
136+
; RV64IA-ZABHA: # %bb.0:
137+
; RV64IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
138+
; RV64IA-ZABHA-NEXT: ret
139+
;
140+
; RV64IA-LABEL: atomicrmw_add_i16:
141+
; RV64IA: # %bb.0:
142+
; RV64IA-NEXT: lui a2, 16
143+
; RV64IA-NEXT: andi a3, a0, -4
144+
; RV64IA-NEXT: andi a0, a0, 3
145+
; RV64IA-NEXT: addi a2, a2, -1
146+
; RV64IA-NEXT: slli a0, a0, 3
147+
; RV64IA-NEXT: sllw a4, a2, a0
148+
; RV64IA-NEXT: and a1, a1, a2
149+
; RV64IA-NEXT: sllw a1, a1, a0
150+
; RV64IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
151+
; RV64IA-NEXT: lr.w.aqrl a2, (a3)
152+
; RV64IA-NEXT: add a5, a2, a1
153+
; RV64IA-NEXT: xor a5, a2, a5
154+
; RV64IA-NEXT: and a5, a5, a4
155+
; RV64IA-NEXT: xor a5, a2, a5
156+
; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
157+
; RV64IA-NEXT: bnez a5, .LBB1_1
158+
; RV64IA-NEXT: # %bb.2:
159+
; RV64IA-NEXT: srlw a0, a2, a0
160+
; RV64IA-NEXT: ret
161+
;
162+
; RV64-LABEL: atomicrmw_add_i16:
163+
; RV64: # %bb.0:
164+
; RV64-NEXT: addi sp, sp, -16
165+
; RV64-NEXT: .cfi_def_cfa_offset 16
166+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
167+
; RV64-NEXT: .cfi_offset ra, -8
168+
; RV64-NEXT: li a2, 5
169+
; RV64-NEXT: call __atomic_fetch_add_2
170+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
171+
; RV64-NEXT: .cfi_restore ra
172+
; RV64-NEXT: addi sp, sp, 16
173+
; RV64-NEXT: .cfi_def_cfa_offset 0
174+
; RV64-NEXT: ret
175+
%res = atomicrmw add ptr %ptr, i16 %rhs seq_cst
176+
ret i16 %res
177+
}
178+
179+
define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) {
180+
; RV32IA-ZABHA-LABEL: atomicrmw_add_i32:
181+
; RV32IA-ZABHA: # %bb.0:
182+
; RV32IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
183+
; RV32IA-ZABHA-NEXT: ret
184+
;
185+
; RV32IA-LABEL: atomicrmw_add_i32:
186+
; RV32IA: # %bb.0:
187+
; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
188+
; RV32IA-NEXT: ret
189+
;
190+
; RV32-LABEL: atomicrmw_add_i32:
191+
; RV32: # %bb.0:
192+
; RV32-NEXT: addi sp, sp, -16
193+
; RV32-NEXT: .cfi_def_cfa_offset 16
194+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
195+
; RV32-NEXT: .cfi_offset ra, -4
196+
; RV32-NEXT: li a2, 5
197+
; RV32-NEXT: call __atomic_fetch_add_4
198+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
199+
; RV32-NEXT: .cfi_restore ra
200+
; RV32-NEXT: addi sp, sp, 16
201+
; RV32-NEXT: .cfi_def_cfa_offset 0
202+
; RV32-NEXT: ret
203+
;
204+
; RV64IA-ZABHA-LABEL: atomicrmw_add_i32:
205+
; RV64IA-ZABHA: # %bb.0:
206+
; RV64IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
207+
; RV64IA-ZABHA-NEXT: ret
208+
;
209+
; RV64IA-LABEL: atomicrmw_add_i32:
210+
; RV64IA: # %bb.0:
211+
; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
212+
; RV64IA-NEXT: ret
213+
;
214+
; RV64-LABEL: atomicrmw_add_i32:
215+
; RV64: # %bb.0:
216+
; RV64-NEXT: addi sp, sp, -16
217+
; RV64-NEXT: .cfi_def_cfa_offset 16
218+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
219+
; RV64-NEXT: .cfi_offset ra, -8
220+
; RV64-NEXT: li a2, 5
221+
; RV64-NEXT: call __atomic_fetch_add_4
222+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
223+
; RV64-NEXT: .cfi_restore ra
224+
; RV64-NEXT: addi sp, sp, 16
225+
; RV64-NEXT: .cfi_def_cfa_offset 0
226+
; RV64-NEXT: ret
227+
%res = atomicrmw add ptr %ptr, i32 %rhs seq_cst
228+
ret i32 %res
229+
}
230+
231+
define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) {
232+
; RV32IA-ZABHA-LABEL: atomicrmw_add_i64:
233+
; RV32IA-ZABHA: # %bb.0:
234+
; RV32IA-ZABHA-NEXT: addi sp, sp, -16
235+
; RV32IA-ZABHA-NEXT: .cfi_def_cfa_offset 16
236+
; RV32IA-ZABHA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
237+
; RV32IA-ZABHA-NEXT: .cfi_offset ra, -4
238+
; RV32IA-ZABHA-NEXT: li a3, 5
239+
; RV32IA-ZABHA-NEXT: call __atomic_fetch_add_8
240+
; RV32IA-ZABHA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
241+
; RV32IA-ZABHA-NEXT: .cfi_restore ra
242+
; RV32IA-ZABHA-NEXT: addi sp, sp, 16
243+
; RV32IA-ZABHA-NEXT: .cfi_def_cfa_offset 0
244+
; RV32IA-ZABHA-NEXT: ret
245+
;
246+
; RV32IA-LABEL: atomicrmw_add_i64:
247+
; RV32IA: # %bb.0:
248+
; RV32IA-NEXT: addi sp, sp, -16
249+
; RV32IA-NEXT: .cfi_def_cfa_offset 16
250+
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
251+
; RV32IA-NEXT: .cfi_offset ra, -4
252+
; RV32IA-NEXT: li a3, 5
253+
; RV32IA-NEXT: call __atomic_fetch_add_8
254+
; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
255+
; RV32IA-NEXT: .cfi_restore ra
256+
; RV32IA-NEXT: addi sp, sp, 16
257+
; RV32IA-NEXT: .cfi_def_cfa_offset 0
258+
; RV32IA-NEXT: ret
259+
;
260+
; RV32-LABEL: atomicrmw_add_i64:
261+
; RV32: # %bb.0:
262+
; RV32-NEXT: addi sp, sp, -16
263+
; RV32-NEXT: .cfi_def_cfa_offset 16
264+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
265+
; RV32-NEXT: .cfi_offset ra, -4
266+
; RV32-NEXT: li a3, 5
267+
; RV32-NEXT: call __atomic_fetch_add_8
268+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
269+
; RV32-NEXT: .cfi_restore ra
270+
; RV32-NEXT: addi sp, sp, 16
271+
; RV32-NEXT: .cfi_def_cfa_offset 0
272+
; RV32-NEXT: ret
273+
;
274+
; RV64IA-ZABHA-LABEL: atomicrmw_add_i64:
275+
; RV64IA-ZABHA: # %bb.0:
276+
; RV64IA-ZABHA-NEXT: amoadd.d.aqrl a0, a1, (a0)
277+
; RV64IA-ZABHA-NEXT: ret
278+
;
279+
; RV64IA-LABEL: atomicrmw_add_i64:
280+
; RV64IA: # %bb.0:
281+
; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0)
282+
; RV64IA-NEXT: ret
283+
;
284+
; RV64-LABEL: atomicrmw_add_i64:
285+
; RV64: # %bb.0:
286+
; RV64-NEXT: addi sp, sp, -16
287+
; RV64-NEXT: .cfi_def_cfa_offset 16
288+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
289+
; RV64-NEXT: .cfi_offset ra, -8
290+
; RV64-NEXT: li a2, 5
291+
; RV64-NEXT: call __atomic_fetch_add_8
292+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
293+
; RV64-NEXT: .cfi_restore ra
294+
; RV64-NEXT: addi sp, sp, 16
295+
; RV64-NEXT: .cfi_def_cfa_offset 0
296+
; RV64-NEXT: ret
297+
%res = atomicrmw add ptr %ptr, i64 %rhs seq_cst
298+
ret i64 %res
299+
}

0 commit comments

Comments
 (0)