-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[X86] Cast atomic vectors in IR to support floats #148899
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/gt/07-15-_selectiondag_split_vector_types_for_atomic_load
Are you sure you want to change the base?
Conversation
|
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: None (jofrn) ChangesThis commit casts floats to ints in an atomic load during AtomicExpand to support Full diff: https://github.com/llvm/llvm-project/pull/148899.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 776d3c0a42e2f..3debf30da0a29 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32070,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
}
}
+TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
+ if (LI->getType()->getScalarType()->isFloatingPointTy())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+}
+
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5cb6b3e493a32..43cddb2b53bd6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1839,6 +1839,8 @@ namespace llvm {
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldCastAtomicLoadInIR(LoadInst *LI) const override;
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 4b818b6cfa57e..039edcbf83544 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -207,19 +207,19 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_bfloat:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O3-NEXT: movd %eax, %xmm0
; CHECK-O3-NEXT: retq
;
; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_bfloat:
@@ -227,8 +227,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-O0-NEXT: movw (%rdi), %cx
; CHECK-O0-NEXT: # implicit-def: $eax
; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O0-NEXT: movd %eax, %xmm0
; CHECK-O0-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
@@ -236,8 +235,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
@@ -245,8 +243,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
-; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
-; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
ret <1 x bfloat> %ret
@@ -377,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
ret <2 x float> %ret
}
+define <2 x half> @atomic_vec2_half(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_half:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_half:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_half:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_half:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_half:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_half:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x half>, ptr %x acquire, align 4
+ ret <2 x half> %ret
+}
+
+define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_bfloat:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_bfloat:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
+ ret <2 x bfloat> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec1_ptr:
; CHECK-O3: # %bb.0:
@@ -457,19 +522,19 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_half:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O3-NEXT: movd %eax, %xmm0
; CHECK-O3-NEXT: retq
;
; CHECK-SSE-O3-LABEL: atomic_vec1_half:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_half:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_half:
@@ -477,8 +542,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-O0-NEXT: movw (%rdi), %cx
; CHECK-O0-NEXT: # implicit-def: $eax
; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O0-NEXT: movd %eax, %xmm0
; CHECK-O0-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_half:
@@ -486,8 +550,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_half:
@@ -495,8 +558,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
-; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
-; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x half>, ptr %x acquire, align 2
ret <1 x half> %ret
@@ -841,6 +903,89 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
ret <4 x i16> %ret
}
+define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_half:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <4 x half>, ptr %x acquire, align 8
+ ret <4 x half> %ret
+}
+
+define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_bfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
+ ret <4 x bfloat> %ret
+}
+
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_float_align:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rax
+; CHECK-O3-NEXT: movl $2, %esi
+; CHECK-O3-NEXT: callq __atomic_load_16@PLT
+; CHECK-O3-NEXT: movq %rdx, %xmm1
+; CHECK-O3-NEXT: movq %rax, %xmm0
+; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O3-NEXT: popq %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_float_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: pushq %rbx
+; CHECK-SSE-O3-NEXT: xorl %eax, %eax
+; CHECK-SSE-O3-NEXT: xorl %edx, %edx
+; CHECK-SSE-O3-NEXT: xorl %ecx, %ecx
+; CHECK-SSE-O3-NEXT: xorl %ebx, %ebx
+; CHECK-SSE-O3-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-SSE-O3-NEXT: movq %rdx, %xmm1
+; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE-O3-NEXT: popq %rbx
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_float_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec4_float_align:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: movl $2, %esi
+; CHECK-O0-NEXT: callq __atomic_load_16@PLT
+; CHECK-O0-NEXT: movq %rdx, %xmm1
+; CHECK-O0-NEXT: movq %rax, %xmm0
+; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O0-NEXT: popq %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_float_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: pushq %rbx
+; CHECK-SSE-O0-NEXT: xorl %eax, %eax
+; CHECK-SSE-O0-NEXT: movl %eax, %ebx
+; CHECK-SSE-O0-NEXT: movq %rbx, %rax
+; CHECK-SSE-O0-NEXT: movq %rbx, %rdx
+; CHECK-SSE-O0-NEXT: movq %rbx, %rcx
+; CHECK-SSE-O0-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-SSE-O0-NEXT: movq %rdx, %xmm1
+; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE-O0-NEXT: popq %rbx
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_float_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <4 x float>, ptr %x acquire, align 16
+ ret <4 x float> %ret
+}
+
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec4_float:
; CHECK-O3: # %bb.0:
|
b8727ee to
ff8979e
Compare
b30786f to
23b20f1
Compare
ff8979e to
aab7a4e
Compare
23b20f1 to
038ab35
Compare
aab7a4e to
74eb40e
Compare
038ab35 to
1f63bc7
Compare
74eb40e to
fc3ec7b
Compare
02b47f7 to
c5418c9
Compare
fc3ec7b to
15e8347
Compare
c5418c9 to
d81b453
Compare
15e8347 to
28b4f4a
Compare
d81b453 to
daca511
Compare
28b4f4a to
1bcf1f1
Compare
daca511 to
cce2b9d
Compare
1bcf1f1 to
98b88dc
Compare
cce2b9d to
02c0d96
Compare
98b88dc to
40f8e4e
Compare
40f8e4e to
8cf6fc8
Compare
a5c0410 to
804de61
Compare
4e9876d to
226394d
Compare
804de61 to
612099a
Compare
612099a to
ab47398
Compare
81c146e to
a0038c0
Compare
ab47398 to
edc1e28
Compare
a0038c0 to
5c2428c
Compare
edc1e28 to
35e2752
Compare
5c2428c to
9411926
Compare
4d1cdad to
f9b99b9
Compare
9411926 to
1434bcf
Compare
This commit casts floats to ints in an atomic load during AtomicExpand to support floating point types. It also is required to support 128 bit vectors in SSE/AVX.
1434bcf to
8466578
Compare
f9b99b9 to
23fb928
Compare

This commit casts floats to ints in an atomic load during AtomicExpand to support
floating point types. It also is required to support 128 bit vectors in SSE/AVX.