Skip to content

Conversation

@jofrn
Copy link
Contributor

@jofrn jofrn commented Jul 15, 2025

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.

@llvmbot
Copy link
Member

llvmbot commented Jul 15, 2025

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: None (jofrn)

Changes

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.


Full diff: https://github.com/llvm/llvm-project/pull/148897.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+1)
  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+74-23)
  • (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+286)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d24b4517a460d..b6e018ba0e454 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1068,6 +1068,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d6cbf2211f053..42763aab5bb55 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4622,6 +4622,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+    Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+    break;
   case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -6003,6 +6006,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
                      N->getOperand(1), N->getOperand(2));
 }
 
+/// Either return the same load or provide appropriate casts
+/// from the load and return that.
+static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+                                 TypeSize LdWidth, TypeSize FirstVTWidth,
+                                 SDLoc dl, SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+    unsigned NumElts =
+        WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+    EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+    SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+    return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  }
+  assert(FirstVT == WidenVT);
+  return LdOp;
+}
+
+static std::optional<EVT> findMemType(SelectionDAG &DAG,
+                                      const TargetLowering &TLI, unsigned Width,
+                                      EVT WidenVT, unsigned Align,
+                                      unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+      TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
+         "Must be scalable");
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
+         "Expected equivalent element types");
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+
+  // Find the vector type that can load from.
+  std::optional<EVT> FirstVT =
+      findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
+                  WidthDiff.getKnownMinValue());
+
+  if (!FirstVT)
+    return SDValue();
+
+  SmallVector<EVT, 8> MemVTs;
+  TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+  SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
+                                   Chain, BasePtr, LD->getMemOperand());
+
+  // Load the element with one instruction.
+  SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
+                                     FirstVTWidth, dl, DAG);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(LD, 1), LdOp.getValue(1));
+  return Result;
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
   ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7894,29 +7965,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
   LdChain.push_back(LdOp.getValue(1));
 
   // Check if we can load the element with one instruction.
-  if (MemVTs.empty()) {
-    assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
-    if (!FirstVT->isVector()) {
-      unsigned NumElts =
-          WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
-      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
-      SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
-      return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
-    }
-    if (FirstVT == WidenVT)
-      return LdOp;
-
-    // TODO: We don't currently have any tests that exercise this code path.
-    assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
-    unsigned NumConcat =
-        WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
-    SmallVector<SDValue, 16> ConcatOps(NumConcat);
-    SDValue UndefVal = DAG.getUNDEF(*FirstVT);
-    ConcatOps[0] = LdOp;
-    for (unsigned i = 1; i != NumConcat; ++i)
-      ConcatOps[i] = UndefVal;
-    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
-  }
+  if (MemVTs.empty())
+    return coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl,
+                             DAG);
 
   // Load vector by using multiple loads from largest vector to scalar.
   SmallVector<SDValue, 16> LdOps;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3e7b73a65fe07..ff5391f44bbe3 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -270,6 +270,212 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <2 x i8> @atomic_vec2_i8(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_i8:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movzwl (%rdi), %eax
+; CHECK-O3-NEXT:    movd %eax, %xmm0
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_i8:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT:    movd %eax, %xmm0
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_i8:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT:    vmovd %eax, %xmm0
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec2_i8:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movw (%rdi), %cx
+; CHECK-O0-NEXT:    # implicit-def: $eax
+; CHECK-O0-NEXT:    movw %cx, %ax
+; CHECK-O0-NEXT:    movd %eax, %xmm0
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_i8:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    movw (%rdi), %cx
+; CHECK-SSE-O0-NEXT:    # implicit-def: $eax
+; CHECK-SSE-O0-NEXT:    movw %cx, %ax
+; CHECK-SSE-O0-NEXT:    movd %eax, %xmm0
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_i8:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    movw (%rdi), %cx
+; CHECK-AVX-O0-NEXT:    # implicit-def: $eax
+; CHECK-AVX-O0-NEXT:    movw %cx, %ax
+; CHECK-AVX-O0-NEXT:    vmovd %eax, %xmm0
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <2 x i8>, ptr %x acquire, align 4
+  ret <2 x i8> %ret
+}
+
+define <2 x i16> @atomic_vec2_i16(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_i16:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movl (%rdi), %eax
+; CHECK-O3-NEXT:    movd %eax, %xmm0
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_i16:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    movl (%rdi), %eax
+; CHECK-SSE-O3-NEXT:    movd %eax, %xmm0
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_i16:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    movl (%rdi), %eax
+; CHECK-AVX-O3-NEXT:    vmovd %eax, %xmm0
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec2_i16:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movl (%rdi), %eax
+; CHECK-O0-NEXT:    movd %eax, %xmm0
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_i16:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    movl (%rdi), %eax
+; CHECK-SSE-O0-NEXT:    movd %eax, %xmm0
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_i16:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    movl (%rdi), %eax
+; CHECK-AVX-O0-NEXT:    vmovd %eax, %xmm0
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <2 x i16>, ptr %x acquire, align 4
+  ret <2 x i16> %ret
+}
+
+define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_ptr270:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movq (%rdi), %rax
+; CHECK-O3-NEXT:    movq %rax, %xmm0
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_ptr270:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    movq (%rdi), %rax
+; CHECK-SSE-O3-NEXT:    movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_ptr270:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    movq (%rdi), %rax
+; CHECK-AVX-O3-NEXT:    vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec2_ptr270:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    movq %rax, %xmm0
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_ptr270:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    movq (%rdi), %rax
+; CHECK-SSE-O0-NEXT:    movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_ptr270:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    movq (%rdi), %rax
+; CHECK-AVX-O0-NEXT:    vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
+  ret <2 x ptr addrspace(270)> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_i32_align:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movq (%rdi), %rax
+; CHECK-O3-NEXT:    movq %rax, %xmm0
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    movq (%rdi), %rax
+; CHECK-SSE-O3-NEXT:    movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_i32_align:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    movq (%rdi), %rax
+; CHECK-AVX-O3-NEXT:    vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec2_i32_align:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    movq %rax, %xmm0
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_i32_align:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    movq (%rdi), %rax
+; CHECK-SSE-O0-NEXT:    movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_i32_align:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    movq (%rdi), %rax
+; CHECK-AVX-O0-NEXT:    vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 8
+  ret <2 x i32> %ret
+}
+
+define <2 x float> @atomic_vec2_float_align(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_float_align:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movq (%rdi), %rax
+; CHECK-O3-NEXT:    movq %rax, %xmm0
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_float_align:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    movq (%rdi), %rax
+; CHECK-SSE-O3-NEXT:    movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_float_align:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    movq (%rdi), %rax
+; CHECK-AVX-O3-NEXT:    vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec2_float_align:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    movq %rax, %xmm0
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_float_align:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    movq (%rdi), %rax
+; CHECK-SSE-O0-NEXT:    movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_float_align:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    movq (%rdi), %rax
+; CHECK-AVX-O0-NEXT:    vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <2 x float>, ptr %x acquire, align 8
+  ret <2 x float> %ret
+}
+
 define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
 ; CHECK-O3-LABEL: atomic_vec1_ptr:
 ; CHECK-O3:       # %bb.0:
@@ -691,6 +897,86 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_i8:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movl (%rdi), %eax
+; CHECK-O3-NEXT:    movd %eax, %xmm0
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_i8:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    movl (%rdi), %eax
+; CHECK-SSE-O3-NEXT:    movd %eax, %xmm0
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_i8:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    movl (%rdi), %eax
+; CHECK-AVX-O3-NEXT:    vmovd %eax, %xmm0
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec4_i8:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movl (%rdi), %eax
+; CHECK-O0-NEXT:    movd %eax, %xmm0
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_i8:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    movl (%rdi), %eax
+; CHECK-SSE-O0-NEXT:    movd %eax, %xmm0
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_i8:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    movl (%rdi), %eax
+; CHECK-AVX-O0-NEXT:    vmovd %eax, %xmm0
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <4 x i8>, ptr %x acquire, align 4
+  ret <4 x i8> %ret
+}
+
+define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_i16:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movq (%rdi), %rax
+; CHECK-O3-NEXT:    movq %rax, %xmm0
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_i16:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    movq (%rdi), %rax
+; CHECK-SSE-O3-NEXT:    movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_i16:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    movq (%rdi), %rax
+; CHECK-AVX-O3-NEXT:    vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec4_i16:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    movq %rax, %xmm0
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_i16:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    movq (%rdi), %rax
+; CHECK-SSE-O0-NEXT:    movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_i16:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    movq (%rdi), %rax
+; CHECK-AVX-O0-NEXT:    vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <4 x i16>, ptr %x acquire, align 8
+  ret <4 x i16> %ret
+}
+
 define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
 ; CHECK-O3-LABEL: atomic_vec4_float:
 ; CHECK-O3:       # %bb.0:

@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from 27e1e69 to e9ba6ed Compare July 16, 2025 13:46
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_selectiondag_widen_2_x_t_vector_types_for_atomic_load branch from 73ea819 to da4fe6c Compare July 16, 2025 13:46
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from e9ba6ed to a162961 Compare October 15, 2025 19:19
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_selectiondag_widen_2_x_t_vector_types_for_atomic_load branch from da4fe6c to b382b4a Compare October 15, 2025 19:19
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from a162961 to 2f09512 Compare October 16, 2025 15:29
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_selectiondag_widen_2_x_t_vector_types_for_atomic_load branch 2 times, most recently from c379447 to a9d4257 Compare October 20, 2025 21:13
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch 2 times, most recently from 206603c to 0f3bacd Compare October 21, 2025 10:42
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_selectiondag_widen_2_x_t_vector_types_for_atomic_load branch 2 times, most recently from 3bb1a53 to e7a4522 Compare October 21, 2025 15:37
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from 0f3bacd to 1ed917f Compare October 21, 2025 15:37
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_selectiondag_widen_2_x_t_vector_types_for_atomic_load branch from e7a4522 to 94d15c5 Compare October 22, 2025 15:42
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch 4 times, most recently from 340c46b to b49fc24 Compare October 23, 2025 18:48
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch 6 times, most recently from da000fe to cff15ed Compare October 23, 2025 19:37
Base automatically changed from users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes to main October 23, 2025 20:11
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_selectiondag_widen_2_x_t_vector_types_for_atomic_load branch 2 times, most recently from 2bc70e4 to e358aca Compare October 29, 2025 05:19
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with a couple of minors

static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
TypeSize LdWidth, TypeSize FirstVTWidth,
SDLoc dl, SelectionDAG &DAG) {
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(style) missing assert message

SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
assert(FirstVT == WidenVT);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(style) missing assert message

RKSimon added a commit to RKSimon/llvm-project that referenced this pull request Oct 29, 2025
… improve reuse and fix missing AVX2/AVX512 checks

-mcpu=x86-64 is still SSE codegen, and there were missing AVX2/AVX512 checks where the common CHECK-AVX prefix clashed

Noticed while reviewing llvm#148897
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jofrn please rebase after #165552 has landed

RKSimon added a commit that referenced this pull request Oct 29, 2025
… improve reuse and fix missing AVX2/AVX512 checks (#165552)

-mcpu=x86-64 is still SSE codegen, and there were missing AVX2/AVX512 checks where the common CHECK-AVX prefix clashed

Noticed while reviewing #148897
aokblast pushed a commit to aokblast/llvm-project that referenced this pull request Oct 30, 2025
… improve reuse and fix missing AVX2/AVX512 checks (llvm#165552)

-mcpu=x86-64 is still SSE codegen, and there were missing AVX2/AVX512 checks where the common CHECK-AVX prefix clashed

Noticed while reviewing llvm#148897
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_selectiondag_widen_2_x_t_vector_types_for_atomic_load branch from e358aca to bbc2714 Compare October 31, 2025 05:12
DEBADRIBASAK pushed a commit to DEBADRIBASAK/llvm-project that referenced this pull request Nov 3, 2025
… improve reuse and fix missing AVX2/AVX512 checks (llvm#165552)

-mcpu=x86-64 is still SSE codegen, and there were missing AVX2/AVX512 checks where the common CHECK-AVX prefix clashed

Noticed while reviewing llvm#148897
Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_selectiondag_widen_2_x_t_vector_types_for_atomic_load branch from bbc2714 to 353b435 Compare November 6, 2025 17:45
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:X86 llvm:SelectionDAG SelectionDAGISel as well

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants