-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] Insert casts in PromoteAlloca. #124547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
There may be casts between values of type <n x ptr0> and <m x ptr1> where m != n, that is, vectors with a different number of elements even though the sizes match up. This is not valid unless appropriate bitcasts are inserted in between. For example, to cast between %in = <4 x ptr addrspace(5)> and %out = <2 x ptr addrspace(1)>, one must do: %i128 = bitcast <4 x i32> %in to i128 %ivec = bitcast i128 to <2 x i64> %out = inttoptr <2 x i64> to <2 x ptr addrspace(1)>
|
@llvm/pr-subscribers-backend-amdgpu Author: None (jofrn) ChangesThere may be casts between values of type <n x ptr0> and <m x ptr1> where m != n, that is, vectors with a different number of elements even though the sizes match up. This is not valid unless appropriate bitcasts are inserted in between. For example, to cast between %i128 = bitcast <4 x i32> %in to i128 Full diff: https://github.com/llvm/llvm-project/pull/124547.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e27ef71c1c0883..0082e8f4856fea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -470,19 +470,68 @@ static Value *promoteAllocaUserToVector(
return Dummy;
};
- const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
- Type *PtrTy) -> Value * {
+ const auto CreateCastBetweenUnequalNumVecElems = [&Builder, DL, Inst]
+ (Value *Val, Type *ResultTy) -> Value * {
+ // Can already cast between vectors of integers.
+ if (isa<IntegerType>(Val->getType()->getScalarType()) &&
+ isa<IntegerType>(ResultTy->getScalarType()))
+ return Builder.CreateBitOrPointerCast(Val, ResultTy);
+
+ // Insert casts between vectors/scalars of an unequal number of elements.
+ FixedVectorType *ValVTy = dyn_cast<FixedVectorType>(Val->getType());
+ FixedVectorType *ResultVTy = dyn_cast<FixedVectorType>(ResultTy);
+ if (isa<PointerType>(Val->getType()->getScalarType())) {
+ Type *IntTy;
+ if (ValVTy) {
+ Type *IntElemTy = Builder.getIntNTy(
+ DL.getTypeAllocSizeInBits(ValVTy->getScalarType()));
+ IntTy = FixedVectorType::get(IntElemTy, ValVTy->getNumElements());
+ } else
+ IntTy = IntegerType::get(Inst->getParent()->getParent()->getContext(),
+ DL.getTypeAllocSizeInBits(Val->getType()));
+ // Insert ptrtoint if casting to <m x ptr> or if Val is a ptr.
+ const bool IsToScalar = !ResultVTy;
+ const bool IsToVector = ResultVTy->getNumElements() !=
+ ValVTy->getNumElements();
+ if (IsToScalar || IsToVector)
+ Val = Builder.CreatePtrToInt(Val, IntTy);
+ }
+
+ const bool IsScalarToVector = ResultVTy && !ValVTy;
+ const bool IsVectorToVector = ResultVTy &&
+ ValVTy->getNumElements() != ResultVTy->getNumElements();
+ if (IsScalarToVector || IsVectorToVector) {
+ Type *IntTy = Builder.getIntNTy(
+ DL.getTypeAllocSizeInBits(Val->getType()));
+ // Insert bitcast to cast from integer, iM, to vector, <m x iN>.
+ Val = Builder.CreateBitCast(Val, IntTy);
+ // If result is a ptr, insert bitcast from <m x iN> to <n x ptr>.
+ if (isa<PointerType>(ResultVTy->getScalarType())) {
+ FixedVectorType *VectorIntTy =
+ FixedVectorType::get(Builder.getIntNTy(
+ DL.getTypeAllocSizeInBits(ResultVTy->getScalarType())),
+ ResultVTy->getNumElements());
+ Val = Builder.CreateBitCast(Val, VectorIntTy);
+ }
+ }
+ return Builder.CreateBitOrPointerCast(Val, ResultTy);
+ };
+
+ const auto CreateTempPtrIntCast = [&Builder, DL,
+ CreateCastBetweenUnequalNumVecElems]
+ (Value *Val, Type *PtrTy) -> Value * {
assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
const unsigned Size = DL.getTypeStoreSizeInBits(PtrTy);
- if (!PtrTy->isVectorTy())
- return Builder.CreateBitOrPointerCast(Val, Builder.getIntNTy(Size));
+ if (!PtrTy->isVectorTy()) {
+ return CreateCastBetweenUnequalNumVecElems(Val, Builder.getIntNTy(Size));
+ }
const unsigned NumPtrElts = cast<FixedVectorType>(PtrTy)->getNumElements();
// If we want to cast to cast, e.g. a <2 x ptr> into a <4 x i32>, we need to
// first cast the ptr vector to <2 x i64>.
assert((Size % NumPtrElts == 0) && "Vector size not divisble");
Type *EltTy = Builder.getIntNTy(Size / NumPtrElts);
- return Builder.CreateBitOrPointerCast(
- Val, FixedVectorType::get(EltTy, NumPtrElts));
+ FixedVectorType *ResultVTy = FixedVectorType::get(EltTy, NumPtrElts);
+ return CreateCastBetweenUnequalNumVecElems(Val, ResultVTy);
};
Type *VecEltTy = VectorTy->getElementType();
@@ -564,7 +613,7 @@ static Value *promoteAllocaUserToVector(
Val = CreateTempPtrIntCast(Val, AccessTy);
else if (VectorTy->isPtrOrPtrVectorTy())
Val = CreateTempPtrIntCast(Val, VectorTy);
- return Builder.CreateBitOrPointerCast(Val, VectorTy);
+ return CreateCastBetweenUnequalNumVecElems(Val, VectorTy);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll
index 7c5410004ed5b7..f93c6db3c2712b 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll
@@ -327,6 +327,172 @@ entry:
ret void
}
+define <2 x ptr addrspace(1)> @test_subvector_ptralloca_8(<2 x ptr addrspace(1)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @test_subvector_ptralloca_8
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[VAL]] to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(5)>
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr addrspace(5)> undef, ptr addrspace(5) [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP3]], i64 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP5]], ptr addrspace(5) [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP3]], i64 2
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP7]], ptr addrspace(5) [[TMP8]], i32 2
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP3]], i64 3
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP9]], ptr addrspace(5) [[TMP10]], i32 3
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x ptr addrspace(5)> poison, ptr addrspace(5) [[TMP4]], i64 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP12]], ptr addrspace(5) [[TMP6]], i64 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP13]], ptr addrspace(5) [[TMP8]], i64 2
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP14]], ptr addrspace(5) [[TMP10]], i64 3
+; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint <4 x ptr addrspace(5)> [[TMP15]] to <4 x i32>
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i32> [[TMP16]] to i128
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <2 x i64>
+; CHECK-NEXT: [[TMP19:%.*]] = inttoptr <2 x i64> [[TMP18]] to <2 x ptr addrspace(1)>
+; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[TMP19]]
+;
+entry:
+ %stack = alloca [8 x ptr addrspace(5)], align 4, addrspace(5)
+ store <2 x ptr addrspace(1)> %val, ptr addrspace(5) %stack
+ %L = load <2 x ptr addrspace(1)>, ptr addrspace(5) %stack, align 16
+ ret <2 x ptr addrspace(1)> %L
+}
+
+define <2 x ptr addrspace(1)> @test_subvector_ptralloca_4(<2 x ptr addrspace(1)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @test_subvector_ptralloca_4
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[VAL]] to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(5)>
+; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[VAL]]
+;
+entry:
+ %stack = alloca [4 x ptr addrspace(5)], align 4, addrspace(5)
+ store <2 x ptr addrspace(1)> %val, ptr addrspace(5) %stack
+ %L = load <2 x ptr addrspace(1)>, ptr addrspace(5) %stack, align 16
+ ret <2 x ptr addrspace(1)> %L
+}
+
+define <2 x ptr addrspace(1)> @test_vector_ptralloca_2_3to1(<2 x ptr addrspace(1)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @test_vector_ptralloca_2_3to1
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[VAL]] to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(3)>
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr addrspace(3)> undef, ptr addrspace(3) [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr addrspace(3)> [[TMP5]], ptr addrspace(3) [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x ptr addrspace(3)> poison, ptr addrspace(3) [[TMP4]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr addrspace(3)> [[TMP8]], ptr addrspace(3) [[TMP6]], i64 1
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint <4 x ptr addrspace(3)> [[TMP9]] to <4 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to i128
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i128 [[TMP11]] to <2 x i64>
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr <2 x i64> [[TMP12]] to <2 x ptr addrspace(1)>
+; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[TMP13]]
+;
+entry:
+ %stack = alloca [2 x ptr addrspace(3)], align 4, addrspace(3)
+ store <2 x ptr addrspace(1)> %val, ptr addrspace(3) %stack
+ %L = load <2 x ptr addrspace(1)>, ptr addrspace(3) %stack, align 16
+ ret <2 x ptr addrspace(1)> %L
+}
+
+define <2 x ptr addrspace(5)> @test_subvector_ptralloca_2_1to5(<2 x ptr addrspace(5)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(5)> @test_subvector_ptralloca_2_1to5
+; CHECK-SAME: (<2 x ptr addrspace(5)> [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(5)> [[VAL]] to <2 x i32>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <1 x i64> [[TMP2]] to <1 x ptr addrspace(1)>
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x ptr addrspace(1)> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr addrspace(1)> undef, ptr addrspace(1) [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <1 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP4]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint <1 x ptr addrspace(1)> [[TMP6]] to <1 x i64>
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP8]] to <2 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = inttoptr <2 x i32> [[TMP9]] to <2 x ptr addrspace(5)>
+; CHECK-NEXT: ret <2 x ptr addrspace(5)> [[TMP10]]
+;
+entry:
+ %stack = alloca [8 x ptr addrspace(1)], align 4, addrspace(1)
+ store <2 x ptr addrspace(5)> %val, ptr addrspace(1) %stack
+ %L = load <2 x ptr addrspace(5)>, ptr addrspace(1) %stack, align 16
+ ret <2 x ptr addrspace(5)> %L
+}
+
+define <2 x ptr addrspace(270)> @test_subvector_ptralloca_8_3to270(<2 x ptr addrspace(270)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(270)> @test_subvector_ptralloca_8_3to270
+; CHECK-SAME: (<2 x ptr addrspace(270)> [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(270)> [[VAL]] to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(3)>
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr addrspace(3)> undef, ptr addrspace(3) [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr addrspace(3)> [[TMP5]], ptr addrspace(3) [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 2
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x ptr addrspace(3)> [[TMP7]], ptr addrspace(3) [[TMP8]], i32 2
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 3
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x ptr addrspace(3)> [[TMP9]], ptr addrspace(3) [[TMP10]], i32 3
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x ptr addrspace(3)> poison, ptr addrspace(3) [[TMP4]], i64 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x ptr addrspace(3)> [[TMP12]], ptr addrspace(3) [[TMP6]], i64 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x ptr addrspace(3)> [[TMP13]], ptr addrspace(3) [[TMP8]], i64 2
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr addrspace(3)> [[TMP14]], ptr addrspace(3) [[TMP10]], i64 3
+; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint <4 x ptr addrspace(3)> [[TMP15]] to <4 x i32>
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i32> [[TMP16]] to i128
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <2 x i64>
+; CHECK-NEXT: [[TMP19:%.*]] = inttoptr <2 x i64> [[TMP18]] to <2 x ptr addrspace(270)>
+; CHECK-NEXT: ret <2 x ptr addrspace(270)> [[TMP19]]
+;
+entry:
+ %stack = alloca [8 x ptr addrspace(3)], align 4, addrspace(3)
+ store <2 x ptr addrspace(270)> %val, ptr addrspace(3) %stack
+ %L = load <2 x ptr addrspace(270)>, ptr addrspace(3) %stack, align 16
+ ret <2 x ptr addrspace(270)> %L
+}
+
+define ptr @test_subvector_ptralloca_2_scalar(ptr %val) {
+; CHECK-LABEL: define ptr @test_subvector_ptralloca_2_scalar
+; CHECK-SAME: (ptr [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[VAL]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = inttoptr <2 x i32> [[TMP1]] to <2 x ptr addrspace(3)>
+; CHECK-NEXT: ret ptr [[VAL]]
+;
+entry:
+ %stack = alloca <2 x ptr addrspace(3)>, align 8, addrspace(3)
+ store ptr %val, ptr addrspace(3) %stack
+ %L = load ptr, ptr addrspace(3) %stack, align 8
+ ret ptr %L
+}
+
+define ptr @test_subvector_ptralloca_1_scalar(ptr %val) {
+; CHECK-LABEL: define ptr @test_subvector_ptralloca_1_scalar
+; CHECK-SAME: (ptr [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[STACK:%.*]] = alloca <1 x ptr addrspace(3)>, align 8, addrspace(3)
+; CHECK-NEXT: store ptr [[VAL]], ptr addrspace(3) [[STACK]], align 8
+; CHECK-NEXT: [[L:%.*]] = load ptr, ptr addrspace(3) [[STACK]], align 8
+; CHECK-NEXT: ret ptr [[L]]
+;
+entry:
+ %stack = alloca <1 x ptr addrspace(3)>, align 8, addrspace(3)
+ store ptr %val, ptr addrspace(3) %stack
+ %L = load ptr, ptr addrspace(3) %stack, align 8
+ ret ptr %L
+}
+
define void @test_out_of_bounds_subvec(<2 x i64> %val) {
; CHECK-LABEL: define void @test_out_of_bounds_subvec
; CHECK-SAME: (<2 x i64> [[VAL:%.*]]) {
|
You can test this locally with the following command:git-clang-format --diff 62a25a4c7c3b291f0805894926419f1a64cd83f8 37bbba8572d21f2607d5692aa8e1d039c210ea49 --extensions cpp -- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cppView the diff from clang-format here.diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 0082e8f485..4c524901e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -470,8 +470,8 @@ static Value *promoteAllocaUserToVector(
return Dummy;
};
- const auto CreateCastBetweenUnequalNumVecElems = [&Builder, DL, Inst]
- (Value *Val, Type *ResultTy) -> Value * {
+ const auto CreateCastBetweenUnequalNumVecElems =
+ [&Builder, DL, Inst](Value *Val, Type *ResultTy) -> Value * {
// Can already cast between vectors of integers.
if (isa<IntegerType>(Val->getType()->getScalarType()) &&
isa<IntegerType>(ResultTy->getScalarType()))
@@ -491,26 +491,26 @@ static Value *promoteAllocaUserToVector(
DL.getTypeAllocSizeInBits(Val->getType()));
// Insert ptrtoint if casting to <m x ptr> or if Val is a ptr.
const bool IsToScalar = !ResultVTy;
- const bool IsToVector = ResultVTy->getNumElements() !=
- ValVTy->getNumElements();
+ const bool IsToVector =
+ ResultVTy->getNumElements() != ValVTy->getNumElements();
if (IsToScalar || IsToVector)
Val = Builder.CreatePtrToInt(Val, IntTy);
}
const bool IsScalarToVector = ResultVTy && !ValVTy;
- const bool IsVectorToVector = ResultVTy &&
- ValVTy->getNumElements() != ResultVTy->getNumElements();
+ const bool IsVectorToVector =
+ ResultVTy && ValVTy->getNumElements() != ResultVTy->getNumElements();
if (IsScalarToVector || IsVectorToVector) {
- Type *IntTy = Builder.getIntNTy(
- DL.getTypeAllocSizeInBits(Val->getType()));
+ Type *IntTy =
+ Builder.getIntNTy(DL.getTypeAllocSizeInBits(Val->getType()));
// Insert bitcast to cast from integer, iM, to vector, <m x iN>.
Val = Builder.CreateBitCast(Val, IntTy);
// If result is a ptr, insert bitcast from <m x iN> to <n x ptr>.
if (isa<PointerType>(ResultVTy->getScalarType())) {
FixedVectorType *VectorIntTy =
- FixedVectorType::get(Builder.getIntNTy(
- DL.getTypeAllocSizeInBits(ResultVTy->getScalarType())),
- ResultVTy->getNumElements());
+ FixedVectorType::get(Builder.getIntNTy(DL.getTypeAllocSizeInBits(
+ ResultVTy->getScalarType())),
+ ResultVTy->getNumElements());
Val = Builder.CreateBitCast(Val, VectorIntTy);
}
}
@@ -518,8 +518,8 @@ static Value *promoteAllocaUserToVector(
};
const auto CreateTempPtrIntCast = [&Builder, DL,
- CreateCastBetweenUnequalNumVecElems]
- (Value *Val, Type *PtrTy) -> Value * {
+ CreateCastBetweenUnequalNumVecElems](
+ Value *Val, Type *PtrTy) -> Value * {
assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
const unsigned Size = DL.getTypeStoreSizeInBits(PtrTy);
if (!PtrTy->isVectorTy()) {
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 62a25a4c7c3b291f0805894926419f1a64cd83f8 37bbba8572d21f2607d5692aa8e1d039c210ea49 llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this to solve the same issue as #119762? I think SROA has a shorter version of this in convertValue
| ret <2 x ptr addrspace(5)> %L | ||
| } | ||
|
|
||
| define <2 x ptr addrspace(270)> @test_subvector_ptralloca_8_3to270(<2 x ptr addrspace(270)> %val) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't care about 270, that's an x86 thing
There may be casts between values of type and where m != n, that is, vectors with a different number of elements even though the sizes match up. This is not valid unless appropriate bitcasts are inserted in between. For example, to cast between
%in = <4 x ptr addrspace(5)> and %out = <2 x ptr addrspace(1)>, one must do:
%i128 = bitcast <4 x i32> %in to i128
%ivec = bitcast i128 to <2 x i64>
%out = inttoptr <2 x i64> to <2 x ptr addrspace(1)>