Skip to content
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/Instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,11 @@ class LoadInst : public UnaryInstruction {
!isVolatile();
}

/// Returns false if this type would be invalid in the
/// creation of a load atomic instruction.
static bool isValidAtomicTy(Type *Ty, const DataLayout &DL,
AtomicOrdering AO = AtomicOrdering::NotAtomic);

Value *getPointerOperand() { return getOperand(0); }
const Value *getPointerOperand() const { return getOperand(0); }
static unsigned getPointerOperandIndex() { return 0U; }
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/IR/Instructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,16 @@ void LoadInst::AssertOK() {
"Ptr must have pointer type.");
}

bool LoadInst::isValidAtomicTy(Type *Ty, const DataLayout &DL,
AtomicOrdering AO) {
if (!Ty->isIntOrPtrTy() && !Ty->isFloatingPointTy())
return false;
if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease)
return false;
unsigned Size = DL.getTypeSizeInBits(Ty);
return Size >= 8 && !(Size & (Size - 1));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

basictest.ll fails due to checking DL here.

}

static Align computeLoadStoreDefaultAlign(Type *Ty, InsertPosition Pos) {
assert(Pos.isValid() &&
"Insertion position cannot be null when alignment not provided!");
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Scalar/SROA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2853,6 +2853,12 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {

bool visitLoadInst(LoadInst &LI) {
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");

// load atomic vector would be generated, which is illegal
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// load atomic vector would be generated, which is illegal
// Load atomic vector would be generated, which is illegal.

if (LI.isAtomic() &&
!LoadInst::isValidAtomicTy(NewAI.getAllocatedType(), DL))
return false;

Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);

Expand All @@ -2875,6 +2881,7 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
(canConvertValue(DL, NewAllocaTy, TargetTy) ||
(IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
TargetTy->isIntegerTy() && !LI.isVolatile()))) {

Value *NewPtr =
getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
Expand Down
53 changes: 53 additions & 0 deletions llvm/test/Transforms/SROA/atomic-vector.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes='sroa' -S | FileCheck %s

define float @atomic_vector() {
; CHECK-LABEL: define float @atomic_vector() {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <1 x float>, align 4
; CHECK-NEXT: store <1 x float> undef, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load atomic volatile float, ptr [[TMP1]] acquire, align 4
; CHECK-NEXT: ret float [[TMP2]]
;
%src = alloca <1 x float>
%val = alloca <1 x float>
%direct = alloca ptr
call void @llvm.memcpy.p0.p0.i64(ptr %val, ptr %src, i64 4, i1 false)
store ptr %val, ptr %direct
%indirect = load ptr, ptr %direct
%ret = load atomic volatile float, ptr %indirect acquire, align 4
ret float %ret
}

define i32 @atomic_vector_int() {
; CHECK-LABEL: define i32 @atomic_vector_int() {
; CHECK-NEXT: [[VAL:%.*]] = alloca <1 x i32>, align 4
; CHECK-NEXT: store <1 x i32> undef, ptr [[VAL]], align 4
; CHECK-NEXT: [[RET:%.*]] = load atomic volatile i32, ptr [[VAL]] acquire, align 4
; CHECK-NEXT: ret i32 [[RET]]
;
%src = alloca <1 x i32>
%val = alloca <1 x i32>
%direct = alloca ptr
call void @llvm.memcpy.p0.p0.i64(ptr %val, ptr %src, i64 4, i1 false)
store ptr %val, ptr %direct
%indirect = load ptr, ptr %direct
%ret = load atomic volatile i32, ptr %indirect acquire, align 4
ret i32 %ret
}

define ptr @atomic_vector_ptr() {
; CHECK-LABEL: define ptr @atomic_vector_ptr() {
; CHECK-NEXT: [[VAL_SROA_0:%.*]] = alloca <1 x ptr>, align 8
; CHECK-NEXT: store <1 x ptr> undef, ptr [[VAL_SROA_0]], align 8
; CHECK-NEXT: [[VAL_SROA_0_0_VAL_SROA_0_0_RET:%.*]] = load atomic volatile ptr, ptr [[VAL_SROA_0]] acquire, align 4
; CHECK-NEXT: ret ptr [[VAL_SROA_0_0_VAL_SROA_0_0_RET]]
;
%src = alloca <1 x ptr>
%val = alloca <1 x ptr>
%direct = alloca ptr
call void @llvm.memcpy.p0.p0.i64(ptr %val, ptr %src, i64 8, i1 false)
store ptr %val, ptr %direct
%indirect = load ptr, ptr %direct
%ret = load atomic volatile ptr, ptr %indirect acquire, align 4
ret ptr %ret
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test a <2 x i16> or some other real vector. 1 x is a degenerate case

15 changes: 8 additions & 7 deletions llvm/test/Transforms/SROA/basictest.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1332,10 +1332,10 @@ define void @PR15674(ptr %data, ptr %src, i32 %size) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP_SROA_0:%.*]] = alloca i32, align 4
; CHECK-NEXT: switch i32 [[SIZE:%.*]], label [[END:%.*]] [
; CHECK-NEXT: i32 4, label [[BB4:%.*]]
; CHECK-NEXT: i32 3, label [[BB3:%.*]]
; CHECK-NEXT: i32 2, label [[BB2:%.*]]
; CHECK-NEXT: i32 1, label [[BB1:%.*]]
; CHECK-NEXT: i32 4, label [[BB4:%.*]]
; CHECK-NEXT: i32 3, label [[BB3:%.*]]
; CHECK-NEXT: i32 2, label [[BB2:%.*]]
; CHECK-NEXT: i32 1, label [[BB1:%.*]]
; CHECK-NEXT: ]
; CHECK: bb4:
; CHECK-NEXT: [[SRC_GEP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i32 3
Expand Down Expand Up @@ -1971,7 +1971,7 @@ bb7:
define i32 @load_atomic_volatile_past_end() {
; CHECK-LABEL: @load_atomic_volatile_past_end(
; CHECK-NEXT: [[A:%.*]] = alloca i1, align 1
; CHECK-NEXT: [[A_0_V:%.*]] = load atomic volatile i32, ptr [[A]] seq_cst, align 1
; CHECK-NEXT: [[A_0_V:%.*]] = load atomic volatile i32, ptr [[A]] seq_cst, align 4
; CHECK-NEXT: ret i32 [[A_0_V]]
;
%a = alloca i1, align 1
Expand All @@ -1992,8 +1992,9 @@ define i32 @load_volatile_past_end() {

define i32 @load_atomic_past_end() {
; CHECK-LABEL: @load_atomic_past_end(
; CHECK-NEXT: [[A_0_LOAD_EXT:%.*]] = zext i1 undef to i32
; CHECK-NEXT: ret i32 [[A_0_LOAD_EXT]]
; CHECK-NEXT: [[A:%.*]] = alloca i1, align 1
; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[A]] seq_cst, align 4
; CHECK-NEXT: ret i32 [[V]]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Due to this test, we may want to check only the type without DL. Adding too many checks hinders optimization.

;
%a = alloca i1, align 1
%v = load atomic i32, ptr %a seq_cst, align 4
Expand Down
Loading