diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8c0a046d3a7e9..d0badb292647b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -11244,8 +11244,8 @@ If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering ` and optional ``syncscope("")`` argument. The ``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions. Atomic loads produce :ref:`defined ` results when they may see -multiple atomic stores. The type of the pointee must be an integer, pointer, or -floating-point type whose bit width is a power of two greater than or equal to +multiple atomic stores. The type of the pointee must be an integer, pointer, +floating-point, or vector type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. ``align`` must be explicitly specified on atomic loads. Note: if the alignment is not greater or equal to the size of the `` type, the atomic operation is likely to @@ -11385,8 +11385,8 @@ If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering ` and optional ``syncscope("")`` argument. The ``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions. Atomic loads produce :ref:`defined ` results when they may see -multiple atomic stores. The type of the pointee must be an integer, pointer, or -floating-point type whose bit width is a power of two greater than or equal to +multiple atomic stores. The type of the pointee must be an integer, pointer, +floating-point, or vector type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. ``align`` must be explicitly specified on atomic stores. Note: if the alignment is not greater or equal to the size of the `` type, the atomic operation is likely to diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 10efc889fcc7f..56f2c92311f24 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -65,6 +65,7 @@ Changes to the LLVM IR removed: * `mul` +* A `load atomic` may now be used with vector types. * Updated semantics of `llvm.type.checked.load.relative` to match that of `llvm.load.relative`. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index dd9af47da5287..d24b4517a460d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -879,6 +879,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4d844f0036a75..d6cbf2211f053 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -65,6 +65,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: + R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); + break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -455,6 +458,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index fdb4ddaafbbcc..68d42cfadd169 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4323,9 +4323,10 @@ void Verifier::visitLoadInst(LoadInst &LI) { Check(LI.getOrdering() != AtomicOrdering::Release && LI.getOrdering() != AtomicOrdering::AcquireRelease, "Load cannot have Release ordering", &LI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic load operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic load operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &LI); checkAtomicMemAccessSize(ElTy, &LI); } else { @@ -4349,9 +4350,10 @@ void Verifier::visitStoreInst(StoreInst &SI) { Check(SI.getOrdering() != AtomicOrdering::Acquire && SI.getOrdering() != AtomicOrdering::AcquireRelease, "Store cannot have Acquire ordering", &SI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic store operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic store operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &SI); checkAtomicMemAccessSize(ElTy, &SI); } else { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b1a3e3c006bb3..776d3c0a42e2f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll index 39f33f9fdcacb..6609edc2953cc 100644 --- a/llvm/test/Assembler/atomic.ll +++ b/llvm/test/Assembler/atomic.ll @@ -52,6 +52,25 @@ define void @f(ptr %x) { ; CHECK: atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic + ; CHECK : load atomic <1 x i32>, ptr %x unordered, align 4 + load atomic <1 x i32>, ptr %x unordered, align 4 + ; CHECK : store atomic <1 x i32> splat (i32 3), ptr %x release, align 4 + store atomic <1 x i32> , ptr %x release, align 4 + ; CHECK : load atomic <2 x i32>, ptr %x unordered, align 4 + load atomic <2 x i32>, ptr %x unordered, align 4 + ; CHECK : store atomic <2 x i32> , ptr %x release, align 4 + store atomic <2 x i32> , ptr %x release, align 4 + + ; CHECK : load atomic <2 x ptr>, ptr %x unordered, align 4 + load atomic <2 x ptr>, ptr %x unordered, align 4 + ; CHECK : store atomic <2 x ptr> zeroinitializer, ptr %x release, align 4 + store atomic <2 x ptr> zeroinitializer, ptr %x release, align 4 + + ; CHECK : load atomic <2 x float>, ptr %x unordered, align 4 + load atomic <2 x float>, ptr %x unordered, align 4 + ; CHECK : store atomic <2 x float> , ptr %x release, align 4 + store atomic <2 x float> , ptr %x release, align 4 + ; CHECK: fence syncscope("singlethread") release fence syncscope("singlethread") release ; CHECK: fence seq_cst diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 45277ce3d26c4..9fab8b98b4af0 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3 ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -34,6 +34,355 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-O0: {{.*}} -; CHECK-O3: {{.*}} + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_i8: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzbl (%rdi), %eax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i8: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i8: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i8: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movb (%rdi), %al +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i8: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movb (%rdi), %al +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i8: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movb (%rdi), %al +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_i16: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i16: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i16: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i16: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %ax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i16: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movw (%rdi), %ax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i16: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movw (%rdi), %ax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_i8_zext: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzbl (%rdi), %eax +; CHECK-O3-NEXT: movzbl %al, %eax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i8_zext: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax +; CHECK-SSE-O3-NEXT: movzbl %al, %eax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i8_zext: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax +; CHECK-AVX-O3-NEXT: movzbl %al, %eax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i8_zext: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movb (%rdi), %al +; CHECK-O0-NEXT: movzbl %al, %eax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i8_zext: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movb (%rdi), %al +; CHECK-SSE-O0-NEXT: movzbl %al, %eax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i8_zext: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movb (%rdi), %al +; CHECK-AVX-O0-NEXT: movzbl %al, %eax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + %zret = zext <1 x i8> %ret to <1 x i32> + ret <1 x i32> %zret +} + +define <1 x i64> @atomic_vec1_i16_sext(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_i16_sext: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: movswq %ax, %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i16_sext: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax +; CHECK-SSE-O3-NEXT: movswq %ax, %rax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i16_sext: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax +; CHECK-AVX-O3-NEXT: movswq %ax, %rax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i16_sext: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %ax +; CHECK-O0-NEXT: movswq %ax, %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i16_sext: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movw (%rdi), %ax +; CHECK-SSE-O0-NEXT: movswq %ax, %rax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i16_sext: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movw (%rdi), %ax +; CHECK-AVX-O0-NEXT: movswq %ax, %rax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + %sret = sext <1 x i16> %ret to <1 x i64> + ret <1 x i64> %sret +} + +define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) { +; CHECK-LABEL: atomic_vec1_ptr270: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: retq + %ret = load atomic <1 x ptr addrspace(270)>, ptr %x acquire, align 4 + ret <1 x ptr addrspace(270)> %ret +} + +define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_bfloat: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax +; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax +; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_bfloat: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %cx +; CHECK-O0-NEXT: # implicit-def: $eax +; CHECK-O0-NEXT: movw %cx, %ax +; CHECK-O0-NEXT: # implicit-def: $xmm0 +; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movw (%rdi), %cx +; CHECK-SSE-O0-NEXT: # implicit-def: $eax +; CHECK-SSE-O0-NEXT: movw %cx, %ax +; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0 +; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movw (%rdi), %cx +; CHECK-AVX-O0-NEXT: # implicit-def: $eax +; CHECK-AVX-O0-NEXT: movw %cx, %ax +; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0 +; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x bfloat>, ptr %x acquire, align 2 + ret <1 x bfloat> %ret +} + +define <1 x ptr> @atomic_vec1_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_ptr_align: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 8 + ret <1 x ptr> %ret +} + +define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_i64_align: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 8 + ret <1 x i64> %ret +} + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_half: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_half: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax +; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_half: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax +; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_half: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %cx +; CHECK-O0-NEXT: # implicit-def: $eax +; CHECK-O0-NEXT: movw %cx, %ax +; CHECK-O0-NEXT: # implicit-def: $xmm0 +; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_half: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movw (%rdi), %cx +; CHECK-SSE-O0-NEXT: # implicit-def: $eax +; CHECK-SSE-O0-NEXT: movw %cx, %ax +; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0 +; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_half: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movw (%rdi), %cx +; CHECK-AVX-O0-NEXT: # implicit-def: $eax +; CHECK-AVX-O0-NEXT: movw %cx, %ax +; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0 +; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_float: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_float: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_float: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_float: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_float: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_float: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec1_double_align: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_double_align: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_double_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_double_align: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_double_align: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_double_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} diff --git a/llvm/test/Verifier/atomics.ll b/llvm/test/Verifier/atomics.ll index f835b98b24345..17bf5a0528d73 100644 --- a/llvm/test/Verifier/atomics.ll +++ b/llvm/test/Verifier/atomics.ll @@ -1,14 +1,15 @@ ; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s +; CHECK: atomic store operand must have integer, pointer, floating point, or vector type! +; CHECK: atomic load operand must have integer, pointer, floating point, or vector type! -; CHECK: atomic store operand must have integer, pointer, or floating point type! -; CHECK: atomic load operand must have integer, pointer, or floating point type! +%ty = type { i32 }; -define void @foo(ptr %P, <1 x i64> %v) { - store atomic <1 x i64> %v, ptr %P unordered, align 8 +define void @foo(ptr %P, %ty %v) { + store atomic %ty %v, ptr %P unordered, align 8 ret void } -define <1 x i64> @bar(ptr %P) { - %v = load atomic <1 x i64>, ptr %P unordered, align 8 - ret <1 x i64> %v +define %ty @bar(ptr %P) { + %v = load atomic %ty, ptr %P unordered, align 8 + ret %ty %v }