-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[SelectionDAG] Widen <2 x T> vector types for atomic load #148897
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[SelectionDAG] Widen <2 x T> vector types for atomic load #148897
Conversation
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: None (jofrn) ChangesVector types of 2 elements must be widened. This change does this Full diff: https://github.com/llvm/llvm-project/pull/148897.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d24b4517a460d..b6e018ba0e454 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1068,6 +1068,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d6cbf2211f053..42763aab5bb55 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4622,6 +4622,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::ATOMIC_LOAD:
+ Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+ break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
@@ -6003,6 +6006,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
N->getOperand(1), N->getOperand(2));
}
+/// Either return the same load or provide appropriate casts
+/// from the load and return that.
+static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+ TypeSize LdWidth, TypeSize FirstVTWidth,
+ SDLoc dl, SelectionDAG &DAG) {
+ assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ if (!FirstVT.isVector()) {
+ unsigned NumElts =
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+ }
+ assert(FirstVT == WidenVT);
+ return LdOp;
+}
+
+static std::optional<EVT> findMemType(SelectionDAG &DAG,
+ const TargetLowering &TLI, unsigned Width,
+ EVT WidenVT, unsigned Align,
+ unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+ EVT WidenVT =
+ TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
+ assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
+ "Must be scalable");
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
+ "Expected equivalent element types");
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ TypeSize LdWidth = LdVT.getSizeInBits();
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ TypeSize WidthDiff = WidenWidth - LdWidth;
+
+ // Find the vector type that can load from.
+ std::optional<EVT> FirstVT =
+ findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
+ WidthDiff.getKnownMinValue());
+
+ if (!FirstVT)
+ return SDValue();
+
+ SmallVector<EVT, 8> MemVTs;
+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+ SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
+ Chain, BasePtr, LD->getMemOperand());
+
+ // Load the element with one instruction.
+ SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
+ FirstVTWidth, dl, DAG);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(LD, 1), LdOp.getValue(1));
+ return Result;
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7894,29 +7965,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
- if (MemVTs.empty()) {
- assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
- if (!FirstVT->isVector()) {
- unsigned NumElts =
- WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
- EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
- SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
- return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
- }
- if (FirstVT == WidenVT)
- return LdOp;
-
- // TODO: We don't currently have any tests that exercise this code path.
- assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
- unsigned NumConcat =
- WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
- SmallVector<SDValue, 16> ConcatOps(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(*FirstVT);
- ConcatOps[0] = LdOp;
- for (unsigned i = 1; i != NumConcat; ++i)
- ConcatOps[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
- }
+ if (MemVTs.empty())
+ return coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl,
+ DAG);
// Load vector by using multiple loads from largest vector to scalar.
SmallVector<SDValue, 16> LdOps;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3e7b73a65fe07..ff5391f44bbe3 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -270,6 +270,212 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
ret <1 x i64> %ret
}
+define <2 x i8> @atomic_vec2_i8(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_i8:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: movd %eax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_i8:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_i8:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_i8:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %cx
+; CHECK-O0-NEXT: # implicit-def: $eax
+; CHECK-O0-NEXT: movw %cx, %ax
+; CHECK-O0-NEXT: movd %eax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_i8:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
+; CHECK-SSE-O0-NEXT: # implicit-def: $eax
+; CHECK-SSE-O0-NEXT: movw %cx, %ax
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_i8:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
+; CHECK-AVX-O0-NEXT: # implicit-def: $eax
+; CHECK-AVX-O0-NEXT: movw %cx, %ax
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x i8>, ptr %x acquire, align 4
+ ret <2 x i8> %ret
+}
+
+define <2 x i16> @atomic_vec2_i16(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_i16:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movl (%rdi), %eax
+; CHECK-O3-NEXT: movd %eax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_i16:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_i16:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_i16:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movl (%rdi), %eax
+; CHECK-O0-NEXT: movd %eax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_i16:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movl (%rdi), %eax
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_i16:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movl (%rdi), %eax
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x i16>, ptr %x acquire, align 4
+ ret <2 x i16> %ret
+}
+
+define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_ptr270:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rdi), %rax
+; CHECK-O3-NEXT: movq %rax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_ptr270:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
+; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_ptr270:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
+; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_ptr270:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movq %rax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_ptr270:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
+; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_ptr270:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
+; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
+ ret <2 x ptr addrspace(270)> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_i32_align:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rdi), %rax
+; CHECK-O3-NEXT: movq %rax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
+; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_i32_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
+; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_i32_align:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movq %rax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_i32_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
+; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_i32_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
+; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x i32>, ptr %x acquire, align 8
+ ret <2 x i32> %ret
+}
+
+define <2 x float> @atomic_vec2_float_align(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_float_align:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rdi), %rax
+; CHECK-O3-NEXT: movq %rax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_float_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
+; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_float_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
+; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_float_align:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movq %rax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_float_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
+; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_float_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
+; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x float>, ptr %x acquire, align 8
+ ret <2 x float> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec1_ptr:
; CHECK-O3: # %bb.0:
@@ -691,6 +897,86 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
ret <2 x i32> %ret
}
+define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_i8:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movl (%rdi), %eax
+; CHECK-O3-NEXT: movd %eax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_i8:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_i8:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec4_i8:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movl (%rdi), %eax
+; CHECK-O0-NEXT: movd %eax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_i8:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movl (%rdi), %eax
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_i8:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movl (%rdi), %eax
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <4 x i8>, ptr %x acquire, align 4
+ ret <4 x i8> %ret
+}
+
+define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_i16:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rdi), %rax
+; CHECK-O3-NEXT: movq %rax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_i16:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
+; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_i16:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
+; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec4_i16:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movq %rax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_i16:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
+; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_i16:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
+; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <4 x i16>, ptr %x acquire, align 8
+ ret <4 x i16> %ret
+}
+
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec4_float:
; CHECK-O3: # %bb.0:
|
27e1e69 to
e9ba6ed
Compare
73ea819 to
da4fe6c
Compare
e9ba6ed to
a162961
Compare
da4fe6c to
b382b4a
Compare
a162961 to
2f09512
Compare
c379447 to
a9d4257
Compare
206603c to
0f3bacd
Compare
3bb1a53 to
e7a4522
Compare
0f3bacd to
1ed917f
Compare
e7a4522 to
94d15c5
Compare
340c46b to
b49fc24
Compare
da000fe to
cff15ed
Compare
2bc70e4 to
e358aca
Compare
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with a couple of minors
| static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, | ||
| TypeSize LdWidth, TypeSize FirstVTWidth, | ||
| SDLoc dl, SelectionDAG &DAG) { | ||
| assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) missing assert message
| SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); | ||
| return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); | ||
| } | ||
| assert(FirstVT == WidenVT); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) missing assert message
… improve reuse and fix missing AVX2/AVX512 checks -mcpu=x86-64 is still SSE codegen, and there were missing AVX2/AVX512 checks where the common CHECK-AVX prefix clashed Noticed while reviewing llvm#148897
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
… improve reuse and fix missing AVX2/AVX512 checks (llvm#165552) -mcpu=x86-64 is still SSE codegen, and there were missing AVX2/AVX512 checks where the common CHECK-AVX prefix clashed Noticed while reviewing llvm#148897
e358aca to
bbc2714
Compare
… improve reuse and fix missing AVX2/AVX512 checks (llvm#165552) -mcpu=x86-64 is still SSE codegen, and there were missing AVX2/AVX512 checks where the common CHECK-AVX prefix clashed Noticed while reviewing llvm#148897
Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size.
bbc2714 to
353b435
Compare
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.