Skip to content

Commit 3750df4

Browse files
committed
[AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. It also adds support for 128 bit lowering in tablegen to support SSE/AVX.
1 parent 35e2752 commit 3750df4

File tree

6 files changed

+254
-5
lines changed

6 files changed

+254
-5
lines changed

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1949,6 +1949,20 @@ def atomic_load_64 :
19491949
let MemoryVT = i64;
19501950
}
19511951

1952+
def atomic_load_128_v2i64 :
1953+
PatFrag<(ops node:$ptr),
1954+
(atomic_load node:$ptr)> {
1955+
let IsAtomic = true;
1956+
let MemoryVT = v2i64;
1957+
}
1958+
1959+
def atomic_load_128_v4i32 :
1960+
PatFrag<(ops node:$ptr),
1961+
(atomic_load node:$ptr)> {
1962+
let IsAtomic = true;
1963+
let MemoryVT = v4i32;
1964+
}
1965+
19521966
def atomic_load_nonext_8 :
19531967
PatFrag<(ops node:$ptr), (atomic_load_nonext node:$ptr)> {
19541968
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,12 @@ LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
483483
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
484484
LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
485485

486-
Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
486+
Value *NewVal =
487+
LI->getType()->isPointerTy() ||
488+
(LI->getType()->isVectorTy() &&
489+
cast<VectorType>(LI->getType())->getElementType()->isPointerTy())
490+
? Builder.CreateIntToPtr(NewLI, LI->getType())
491+
: Builder.CreateBitCast(NewLI, LI->getType());
487492
LI->replaceAllUsesWith(NewVal);
488493
LI->eraseFromParent();
489494
return NewLI;
@@ -2093,9 +2098,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
20932098
I->replaceAllUsesWith(V);
20942099
} else if (HasResult) {
20952100
Value *V;
2096-
if (UseSizedLibcall)
2097-
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2098-
else {
2101+
if (UseSizedLibcall) {
2102+
// Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2103+
auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
2104+
auto *VTy = dyn_cast<VectorType>(I->getType());
2105+
if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
2106+
unsigned AS = PtrTy->getAddressSpace();
2107+
Value *BC = Builder.CreateBitCast(
2108+
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
2109+
V = Builder.CreateIntToPtr(BC, I->getType());
2110+
} else
2111+
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2112+
} else {
20992113
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
21002114
AllocaAlignment);
21012115
Builder.CreateLifetimeEnd(AllocaResult);

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,6 +1220,21 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
12201220
def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
12211221
(VMOV64toPQIZrm addr:$src)>, Requires<[HasAVX512]>;
12221222

1223+
// load atomic <2 x i64>
1224+
def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)),
1225+
(MOVAPDrm addr:$src)>, Requires<[UseSSE2]>;
1226+
def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)),
1227+
(VMOVAPDrm addr:$src)>, Requires<[UseAVX]>;
1228+
def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)),
1229+
(VMOVAPDZ128rm addr:$src)>, Requires<[HasAVX512]>;
1230+
// load atomic <4 x i32>
1231+
def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)),
1232+
(MOVAPDrm addr:$src)>, Requires<[UseSSE2]>;
1233+
def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)),
1234+
(VMOVAPDrm addr:$src)>, Requires<[UseAVX]>;
1235+
def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)),
1236+
(VMOVAPDZ128rm addr:$src)>, Requires<[HasAVX512]>;
1237+
12231238
// Floating point loads/stores.
12241239
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
12251240
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;

llvm/test/CodeGen/ARM/atomic-load-store.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
983983
store atomic double %val1, ptr %ptr seq_cst, align 8
984984
ret void
985985
}
986+
987+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
988+
; ARM-LABEL: atomic_vec1_ptr:
989+
; ARM: @ %bb.0:
990+
; ARM-NEXT: ldr r0, [r0]
991+
; ARM-NEXT: dmb ish
992+
; ARM-NEXT: bx lr
993+
;
994+
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
995+
; ARMOPTNONE: @ %bb.0:
996+
; ARMOPTNONE-NEXT: ldr r0, [r0]
997+
; ARMOPTNONE-NEXT: dmb ish
998+
; ARMOPTNONE-NEXT: bx lr
999+
;
1000+
; THUMBTWO-LABEL: atomic_vec1_ptr:
1001+
; THUMBTWO: @ %bb.0:
1002+
; THUMBTWO-NEXT: ldr r0, [r0]
1003+
; THUMBTWO-NEXT: dmb ish
1004+
; THUMBTWO-NEXT: bx lr
1005+
;
1006+
; THUMBONE-LABEL: atomic_vec1_ptr:
1007+
; THUMBONE: @ %bb.0:
1008+
; THUMBONE-NEXT: push {r7, lr}
1009+
; THUMBONE-NEXT: movs r1, #0
1010+
; THUMBONE-NEXT: mov r2, r1
1011+
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
1012+
; THUMBONE-NEXT: pop {r7, pc}
1013+
;
1014+
; ARMV4-LABEL: atomic_vec1_ptr:
1015+
; ARMV4: @ %bb.0:
1016+
; ARMV4-NEXT: push {r11, lr}
1017+
; ARMV4-NEXT: mov r1, #2
1018+
; ARMV4-NEXT: bl __atomic_load_4
1019+
; ARMV4-NEXT: pop {r11, lr}
1020+
; ARMV4-NEXT: mov pc, lr
1021+
;
1022+
; ARMV6-LABEL: atomic_vec1_ptr:
1023+
; ARMV6: @ %bb.0:
1024+
; ARMV6-NEXT: ldr r0, [r0]
1025+
; ARMV6-NEXT: mov r1, #0
1026+
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
1027+
; ARMV6-NEXT: bx lr
1028+
;
1029+
; THUMBM-LABEL: atomic_vec1_ptr:
1030+
; THUMBM: @ %bb.0:
1031+
; THUMBM-NEXT: ldr r0, [r0]
1032+
; THUMBM-NEXT: dmb sy
1033+
; THUMBM-NEXT: bx lr
1034+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
1035+
ret <1 x ptr> %ret
1036+
}

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,96 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
244244
%ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
245245
ret <2 x ptr addrspace(270)> %ret
246246
}
247+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
248+
; CHECK-SSE2-O3-LABEL: atomic_vec2_ptr_align:
249+
; CHECK-SSE2-O3: # %bb.0:
250+
; CHECK-SSE2-O3-NEXT: pushq %rax
251+
; CHECK-SSE2-O3-NEXT: movl $2, %esi
252+
; CHECK-SSE2-O3-NEXT: callq __atomic_load_16@PLT
253+
; CHECK-SSE2-O3-NEXT: movq %rdx, %xmm1
254+
; CHECK-SSE2-O3-NEXT: movq %rax, %xmm0
255+
; CHECK-SSE2-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
256+
; CHECK-SSE2-O3-NEXT: popq %rax
257+
; CHECK-SSE2-O3-NEXT: retq
258+
;
259+
; CHECK-SSE4-O3-LABEL: atomic_vec2_ptr_align:
260+
; CHECK-SSE4-O3: # %bb.0:
261+
; CHECK-SSE4-O3-NEXT: movaps (%rdi), %xmm0
262+
; CHECK-SSE4-O3-NEXT: retq
263+
;
264+
; CHECK-AVX-O3-LABEL: atomic_vec2_ptr_align:
265+
; CHECK-AVX-O3: # %bb.0:
266+
; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
267+
; CHECK-AVX-O3-NEXT: retq
268+
;
269+
; CHECK-SSE2-O0-LABEL: atomic_vec2_ptr_align:
270+
; CHECK-SSE2-O0: # %bb.0:
271+
; CHECK-SSE2-O0-NEXT: pushq %rax
272+
; CHECK-SSE2-O0-NEXT: movl $2, %esi
273+
; CHECK-SSE2-O0-NEXT: callq __atomic_load_16@PLT
274+
; CHECK-SSE2-O0-NEXT: movq %rdx, %xmm1
275+
; CHECK-SSE2-O0-NEXT: movq %rax, %xmm0
276+
; CHECK-SSE2-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
277+
; CHECK-SSE2-O0-NEXT: popq %rax
278+
; CHECK-SSE2-O0-NEXT: retq
279+
;
280+
; CHECK-SSE4-O0-LABEL: atomic_vec2_ptr_align:
281+
; CHECK-SSE4-O0: # %bb.0:
282+
; CHECK-SSE4-O0-NEXT: movapd (%rdi), %xmm0
283+
; CHECK-SSE4-O0-NEXT: retq
284+
;
285+
; CHECK-AVX-O0-LABEL: atomic_vec2_ptr_align:
286+
; CHECK-AVX-O0: # %bb.0:
287+
; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0
288+
; CHECK-AVX-O0-NEXT: retq
289+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
290+
ret <2 x ptr> %ret
291+
}
292+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
293+
; CHECK-SSE2-O3-LABEL: atomic_vec4_ptr270:
294+
; CHECK-SSE2-O3: # %bb.0:
295+
; CHECK-SSE2-O3-NEXT: pushq %rax
296+
; CHECK-SSE2-O3-NEXT: movl $2, %esi
297+
; CHECK-SSE2-O3-NEXT: callq __atomic_load_16@PLT
298+
; CHECK-SSE2-O3-NEXT: movq %rdx, %xmm1
299+
; CHECK-SSE2-O3-NEXT: movq %rax, %xmm0
300+
; CHECK-SSE2-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
301+
; CHECK-SSE2-O3-NEXT: popq %rax
302+
; CHECK-SSE2-O3-NEXT: retq
303+
;
304+
; CHECK-SSE4-O3-LABEL: atomic_vec4_ptr270:
305+
; CHECK-SSE4-O3: # %bb.0:
306+
; CHECK-SSE4-O3-NEXT: movaps (%rdi), %xmm0
307+
; CHECK-SSE4-O3-NEXT: retq
308+
;
309+
; CHECK-AVX-O3-LABEL: atomic_vec4_ptr270:
310+
; CHECK-AVX-O3: # %bb.0:
311+
; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
312+
; CHECK-AVX-O3-NEXT: retq
313+
;
314+
; CHECK-SSE2-O0-LABEL: atomic_vec4_ptr270:
315+
; CHECK-SSE2-O0: # %bb.0:
316+
; CHECK-SSE2-O0-NEXT: pushq %rax
317+
; CHECK-SSE2-O0-NEXT: movl $2, %esi
318+
; CHECK-SSE2-O0-NEXT: callq __atomic_load_16@PLT
319+
; CHECK-SSE2-O0-NEXT: movq %rdx, %xmm1
320+
; CHECK-SSE2-O0-NEXT: movq %rax, %xmm0
321+
; CHECK-SSE2-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
322+
; CHECK-SSE2-O0-NEXT: popq %rax
323+
; CHECK-SSE2-O0-NEXT: retq
324+
;
325+
; CHECK-SSE4-O0-LABEL: atomic_vec4_ptr270:
326+
; CHECK-SSE4-O0: # %bb.0:
327+
; CHECK-SSE4-O0-NEXT: movapd (%rdi), %xmm0
328+
; CHECK-SSE4-O0-NEXT: retq
329+
;
330+
; CHECK-AVX-O0-LABEL: atomic_vec4_ptr270:
331+
; CHECK-AVX-O0: # %bb.0:
332+
; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0
333+
; CHECK-AVX-O0-NEXT: retq
334+
%ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
335+
ret <4 x ptr addrspace(270)> %ret
336+
}
247337

248338
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
249339
; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align:
@@ -727,7 +817,6 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
727817
}
728818

729819
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
730-
;
731820
; CHECK-SSE2-O3-LABEL: atomic_vec4_float_align:
732821
; CHECK-SSE2-O3: # %bb.0:
733822
; CHECK-SSE2-O3-NEXT: pushq %rax

llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,3 +189,69 @@ define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr, ptr addrspace(2) %v)
189189
ret void
190190
}
191191

192+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
193+
; CHECK-LABEL: define <2 x ptr> @atomic_vec2_ptr_align(
194+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
195+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
196+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
197+
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
198+
; CHECK-NEXT: ret <2 x ptr> [[TMP7]]
199+
;
200+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
201+
ret <2 x ptr> %ret
202+
}
203+
204+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind {
205+
; CHECK-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(
206+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
207+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
208+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
209+
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(270)>
210+
; CHECK-NEXT: ret <4 x ptr addrspace(270)> [[TMP3]]
211+
;
212+
%ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
213+
ret <4 x ptr addrspace(270)> %ret
214+
}
215+
216+
define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
217+
; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16(
218+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
219+
; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 8
220+
; CHECK-NEXT: ret <2 x i16> [[RET]]
221+
;
222+
%ret = load atomic <2 x i16>, ptr %x acquire, align 8
223+
ret <2 x i16> %ret
224+
}
225+
226+
define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
227+
; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
228+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
229+
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[X]] acquire, align 8
230+
; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[TMP1]] to <2 x half>
231+
; CHECK-NEXT: ret <2 x half> [[RET]]
232+
;
233+
%ret = load atomic <2 x half>, ptr %x acquire, align 8
234+
ret <2 x half> %ret
235+
}
236+
237+
define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind {
238+
; CHECK-LABEL: define <4 x i32> @atomic_vec4_i32(
239+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
240+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
241+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
242+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
243+
;
244+
%ret = load atomic <4 x i32>, ptr %x acquire, align 16
245+
ret <4 x i32> %ret
246+
}
247+
248+
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
249+
; CHECK-LABEL: define <4 x float> @atomic_vec4_float(
250+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
251+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
252+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float>
253+
; CHECK-NEXT: ret <4 x float> [[TMP2]]
254+
;
255+
%ret = load atomic <4 x float>, ptr %x acquire, align 16
256+
ret <4 x float> %ret
257+
}

0 commit comments

Comments
 (0)