Skip to content

Conversation

@jofrn
Copy link
Contributor

@jofrn jofrn commented Jan 27, 2025

There may be casts between values of type and where m != n, that is, vectors with a different number of elements even though the sizes match up. This is not valid unless appropriate bitcasts are inserted in between. For example, to cast between
%in = <4 x ptr addrspace(5)> and %out = <2 x ptr addrspace(1)>, one must do:

%i128 = bitcast <4 x i32> %in to i128
%ivec = bitcast i128 to <2 x i64>
%out = inttoptr <2 x i64> to <2 x ptr addrspace(1)>

There may be casts between values of type <n x ptr0> and <m x ptr1>
where m != n, that is, vectors with a different number of elements even
though the sizes match up. This is not valid unless appropriate bitcasts are
inserted in between. For example, to cast between
%in = <4 x ptr addrspace(5)> and %out = <2 x ptr addrspace(1)>, one must do:

%i128 = bitcast <4 x i32> %in to i128
%ivec = bitcast i128 to <2 x i64>
%out = inttoptr <2 x i64> to <2 x ptr addrspace(1)>
@llvmbot
Copy link
Member

llvmbot commented Jan 27, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: None (jofrn)

Changes

There may be casts between values of type <n x ptr0> and <m x ptr1> where m != n, that is, vectors with a different number of elements even though the sizes match up. This is not valid unless appropriate bitcasts are inserted in between. For example, to cast between
%in = <4 x ptr addrspace(5)> and %out = <2 x ptr addrspace(1)>, one must do:

%i128 = bitcast <4 x i32> %in to i128
%ivec = bitcast i128 to <2 x i64>
%out = inttoptr <2 x i64> to <2 x ptr addrspace(1)>


Full diff: https://github.com/llvm/llvm-project/pull/124547.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+56-7)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll (+166)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e27ef71c1c0883..0082e8f4856fea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -470,19 +470,68 @@ static Value *promoteAllocaUserToVector(
     return Dummy;
   };
 
-  const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
-                                                   Type *PtrTy) -> Value * {
+  const auto CreateCastBetweenUnequalNumVecElems = [&Builder, DL, Inst]
+      (Value *Val, Type *ResultTy) -> Value * {
+    // Can already cast between vectors of integers.
+    if (isa<IntegerType>(Val->getType()->getScalarType()) &&
+        isa<IntegerType>(ResultTy->getScalarType()))
+      return Builder.CreateBitOrPointerCast(Val, ResultTy);
+
+    // Insert casts between vectors/scalars of an unequal number of elements.
+    FixedVectorType *ValVTy = dyn_cast<FixedVectorType>(Val->getType());
+    FixedVectorType *ResultVTy = dyn_cast<FixedVectorType>(ResultTy);
+    if (isa<PointerType>(Val->getType()->getScalarType())) {
+      Type *IntTy;
+      if (ValVTy) {
+        Type *IntElemTy = Builder.getIntNTy(
+            DL.getTypeAllocSizeInBits(ValVTy->getScalarType()));
+        IntTy = FixedVectorType::get(IntElemTy, ValVTy->getNumElements());
+      } else
+        IntTy = IntegerType::get(Inst->getParent()->getParent()->getContext(),
+                                 DL.getTypeAllocSizeInBits(Val->getType()));
+      // Insert ptrtoint if casting to <m x ptr> or if Val is a ptr.
+      const bool IsToScalar = !ResultVTy;
+      const bool IsToVector = ResultVTy->getNumElements() !=
+          ValVTy->getNumElements();
+      if (IsToScalar || IsToVector)
+        Val = Builder.CreatePtrToInt(Val, IntTy);
+    }
+
+    const bool IsScalarToVector = ResultVTy && !ValVTy;
+    const bool IsVectorToVector = ResultVTy &&
+        ValVTy->getNumElements() != ResultVTy->getNumElements();
+    if (IsScalarToVector || IsVectorToVector) {
+      Type *IntTy = Builder.getIntNTy(
+          DL.getTypeAllocSizeInBits(Val->getType()));
+      // Insert bitcast to cast from integer, iM, to vector, <m x iN>.
+      Val = Builder.CreateBitCast(Val, IntTy);
+      // If result is a ptr, insert bitcast from <m x iN> to <n x ptr>.
+      if (isa<PointerType>(ResultVTy->getScalarType())) {
+        FixedVectorType *VectorIntTy =
+            FixedVectorType::get(Builder.getIntNTy(
+                DL.getTypeAllocSizeInBits(ResultVTy->getScalarType())),
+                ResultVTy->getNumElements());
+        Val = Builder.CreateBitCast(Val, VectorIntTy);
+      }
+    }
+    return Builder.CreateBitOrPointerCast(Val, ResultTy);
+  };
+
+  const auto CreateTempPtrIntCast = [&Builder, DL,
+                                     CreateCastBetweenUnequalNumVecElems]
+                                    (Value *Val, Type *PtrTy) -> Value * {
     assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
     const unsigned Size = DL.getTypeStoreSizeInBits(PtrTy);
-    if (!PtrTy->isVectorTy())
-      return Builder.CreateBitOrPointerCast(Val, Builder.getIntNTy(Size));
+    if (!PtrTy->isVectorTy()) {
+      return CreateCastBetweenUnequalNumVecElems(Val, Builder.getIntNTy(Size));
+    }
     const unsigned NumPtrElts = cast<FixedVectorType>(PtrTy)->getNumElements();
     // If we want to cast to cast, e.g. a <2 x ptr> into a <4 x i32>, we need to
     // first cast the ptr vector to <2 x i64>.
     assert((Size % NumPtrElts == 0) && "Vector size not divisble");
     Type *EltTy = Builder.getIntNTy(Size / NumPtrElts);
-    return Builder.CreateBitOrPointerCast(
-        Val, FixedVectorType::get(EltTy, NumPtrElts));
+    FixedVectorType *ResultVTy = FixedVectorType::get(EltTy, NumPtrElts);
+    return CreateCastBetweenUnequalNumVecElems(Val, ResultVTy);
   };
 
   Type *VecEltTy = VectorTy->getElementType();
@@ -564,7 +613,7 @@ static Value *promoteAllocaUserToVector(
           Val = CreateTempPtrIntCast(Val, AccessTy);
         else if (VectorTy->isPtrOrPtrVectorTy())
           Val = CreateTempPtrIntCast(Val, VectorTy);
-        return Builder.CreateBitOrPointerCast(Val, VectorTy);
+        return CreateCastBetweenUnequalNumVecElems(Val, VectorTy);
       }
     }
 
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll
index 7c5410004ed5b7..f93c6db3c2712b 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll
@@ -327,6 +327,172 @@ entry:
   ret void
 }
 
+define <2 x ptr addrspace(1)> @test_subvector_ptralloca_8(<2 x ptr addrspace(1)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @test_subvector_ptralloca_8
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[VAL]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(5)>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x ptr addrspace(5)> undef, ptr addrspace(5) [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP3]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP5]], ptr addrspace(5) [[TMP6]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP3]], i64 2
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP7]], ptr addrspace(5) [[TMP8]], i32 2
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP3]], i64 3
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP9]], ptr addrspace(5) [[TMP10]], i32 3
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x ptr addrspace(5)> poison, ptr addrspace(5) [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP12]], ptr addrspace(5) [[TMP6]], i64 1
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP13]], ptr addrspace(5) [[TMP8]], i64 2
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP14]], ptr addrspace(5) [[TMP10]], i64 3
+; CHECK-NEXT:    [[TMP16:%.*]] = ptrtoint <4 x ptr addrspace(5)> [[TMP15]] to <4 x i32>
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i32> [[TMP16]] to i128
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <2 x i64>
+; CHECK-NEXT:    [[TMP19:%.*]] = inttoptr <2 x i64> [[TMP18]] to <2 x ptr addrspace(1)>
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[TMP19]]
+;
+entry:
+  %stack = alloca [8 x ptr addrspace(5)], align 4, addrspace(5)
+  store <2 x ptr addrspace(1)> %val, ptr addrspace(5) %stack
+  %L = load <2 x ptr addrspace(1)>, ptr addrspace(5) %stack, align 16
+  ret <2 x ptr addrspace(1)> %L
+}
+
+define <2 x ptr addrspace(1)> @test_subvector_ptralloca_4(<2 x ptr addrspace(1)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @test_subvector_ptralloca_4
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[VAL]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(5)>
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[VAL]]
+;
+entry:
+  %stack = alloca [4 x ptr addrspace(5)], align 4, addrspace(5)
+  store <2 x ptr addrspace(1)> %val, ptr addrspace(5) %stack
+  %L = load <2 x ptr addrspace(1)>, ptr addrspace(5) %stack, align 16
+  ret <2 x ptr addrspace(1)> %L
+}
+
+define <2 x ptr addrspace(1)> @test_vector_ptralloca_2_3to1(<2 x ptr addrspace(1)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(1)> @test_vector_ptralloca_2_3to1
+; CHECK-SAME: (<2 x ptr addrspace(1)> [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[VAL]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(3)>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x ptr addrspace(3)> undef, ptr addrspace(3) [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x ptr addrspace(3)> [[TMP5]], ptr addrspace(3) [[TMP6]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x ptr addrspace(3)> poison, ptr addrspace(3) [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x ptr addrspace(3)> [[TMP8]], ptr addrspace(3) [[TMP6]], i64 1
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint <4 x ptr addrspace(3)> [[TMP9]] to <4 x i32>
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to i128
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i128 [[TMP11]] to <2 x i64>
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr <2 x i64> [[TMP12]] to <2 x ptr addrspace(1)>
+; CHECK-NEXT:    ret <2 x ptr addrspace(1)> [[TMP13]]
+;
+entry:
+  %stack = alloca [2 x ptr addrspace(3)], align 4, addrspace(3)
+  store <2 x ptr addrspace(1)> %val, ptr addrspace(3) %stack
+  %L = load <2 x ptr addrspace(1)>, ptr addrspace(3) %stack, align 16
+  ret <2 x ptr addrspace(1)> %L
+}
+
+define <2 x ptr addrspace(5)> @test_subvector_ptralloca_2_1to5(<2 x ptr addrspace(5)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(5)> @test_subvector_ptralloca_2_1to5
+; CHECK-SAME: (<2 x ptr addrspace(5)> [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(5)> [[VAL]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr <1 x i64> [[TMP2]] to <1 x ptr addrspace(1)>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <1 x ptr addrspace(1)> [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x ptr addrspace(1)> undef, ptr addrspace(1) [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <1 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint <1 x ptr addrspace(1)> [[TMP6]] to <1 x i64>
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP10:%.*]] = inttoptr <2 x i32> [[TMP9]] to <2 x ptr addrspace(5)>
+; CHECK-NEXT:    ret <2 x ptr addrspace(5)> [[TMP10]]
+;
+entry:
+  %stack = alloca [8 x ptr addrspace(1)], align 4, addrspace(1)
+  store <2 x ptr addrspace(5)> %val, ptr addrspace(1) %stack
+  %L = load <2 x ptr addrspace(5)>, ptr addrspace(1) %stack, align 16
+  ret <2 x ptr addrspace(5)> %L
+}
+
+define <2 x ptr addrspace(270)> @test_subvector_ptralloca_8_3to270(<2 x ptr addrspace(270)> %val) {
+; CHECK-LABEL: define <2 x ptr addrspace(270)> @test_subvector_ptralloca_8_3to270
+; CHECK-SAME: (<2 x ptr addrspace(270)> [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(270)> [[VAL]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(3)>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x ptr addrspace(3)> undef, ptr addrspace(3) [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x ptr addrspace(3)> [[TMP5]], ptr addrspace(3) [[TMP6]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 2
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x ptr addrspace(3)> [[TMP7]], ptr addrspace(3) [[TMP8]], i32 2
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x ptr addrspace(3)> [[TMP3]], i64 3
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x ptr addrspace(3)> [[TMP9]], ptr addrspace(3) [[TMP10]], i32 3
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x ptr addrspace(3)> poison, ptr addrspace(3) [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x ptr addrspace(3)> [[TMP12]], ptr addrspace(3) [[TMP6]], i64 1
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x ptr addrspace(3)> [[TMP13]], ptr addrspace(3) [[TMP8]], i64 2
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x ptr addrspace(3)> [[TMP14]], ptr addrspace(3) [[TMP10]], i64 3
+; CHECK-NEXT:    [[TMP16:%.*]] = ptrtoint <4 x ptr addrspace(3)> [[TMP15]] to <4 x i32>
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i32> [[TMP16]] to i128
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <2 x i64>
+; CHECK-NEXT:    [[TMP19:%.*]] = inttoptr <2 x i64> [[TMP18]] to <2 x ptr addrspace(270)>
+; CHECK-NEXT:    ret <2 x ptr addrspace(270)> [[TMP19]]
+;
+entry:
+  %stack = alloca [8 x ptr addrspace(3)], align 4, addrspace(3)
+  store <2 x ptr addrspace(270)> %val, ptr addrspace(3) %stack
+  %L = load <2 x ptr addrspace(270)>, ptr addrspace(3) %stack, align 16
+  ret <2 x ptr addrspace(270)> %L
+}
+
+define ptr @test_subvector_ptralloca_2_scalar(ptr %val) {
+; CHECK-LABEL: define ptr @test_subvector_ptralloca_2_scalar
+; CHECK-SAME: (ptr [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[VAL]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr <2 x i32> [[TMP1]] to <2 x ptr addrspace(3)>
+; CHECK-NEXT:    ret ptr [[VAL]]
+;
+entry:
+  %stack = alloca <2 x ptr addrspace(3)>, align 8, addrspace(3)
+  store ptr %val, ptr addrspace(3) %stack
+  %L = load ptr, ptr addrspace(3) %stack, align 8
+  ret ptr %L
+}
+
+define ptr @test_subvector_ptralloca_1_scalar(ptr %val) {
+; CHECK-LABEL: define ptr @test_subvector_ptralloca_1_scalar
+; CHECK-SAME: (ptr [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STACK:%.*]] = alloca <1 x ptr addrspace(3)>, align 8, addrspace(3)
+; CHECK-NEXT:    store ptr [[VAL]], ptr addrspace(3) [[STACK]], align 8
+; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr addrspace(3) [[STACK]], align 8
+; CHECK-NEXT:    ret ptr [[L]]
+;
+entry:
+  %stack = alloca <1 x ptr addrspace(3)>, align 8, addrspace(3)
+  store ptr %val, ptr addrspace(3) %stack
+  %L = load ptr, ptr addrspace(3) %stack, align 8
+  ret ptr %L
+}
+
 define void @test_out_of_bounds_subvec(<2 x i64> %val) {
 ; CHECK-LABEL: define void @test_out_of_bounds_subvec
 ; CHECK-SAME: (<2 x i64> [[VAL:%.*]]) {

@github-actions
Copy link

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 62a25a4c7c3b291f0805894926419f1a64cd83f8 37bbba8572d21f2607d5692aa8e1d039c210ea49 --extensions cpp -- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
View the diff from clang-format here.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 0082e8f485..4c524901e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -470,8 +470,8 @@ static Value *promoteAllocaUserToVector(
     return Dummy;
   };
 
-  const auto CreateCastBetweenUnequalNumVecElems = [&Builder, DL, Inst]
-      (Value *Val, Type *ResultTy) -> Value * {
+  const auto CreateCastBetweenUnequalNumVecElems =
+      [&Builder, DL, Inst](Value *Val, Type *ResultTy) -> Value * {
     // Can already cast between vectors of integers.
     if (isa<IntegerType>(Val->getType()->getScalarType()) &&
         isa<IntegerType>(ResultTy->getScalarType()))
@@ -491,26 +491,26 @@ static Value *promoteAllocaUserToVector(
                                  DL.getTypeAllocSizeInBits(Val->getType()));
       // Insert ptrtoint if casting to <m x ptr> or if Val is a ptr.
       const bool IsToScalar = !ResultVTy;
-      const bool IsToVector = ResultVTy->getNumElements() !=
-          ValVTy->getNumElements();
+      const bool IsToVector =
+          ResultVTy->getNumElements() != ValVTy->getNumElements();
       if (IsToScalar || IsToVector)
         Val = Builder.CreatePtrToInt(Val, IntTy);
     }
 
     const bool IsScalarToVector = ResultVTy && !ValVTy;
-    const bool IsVectorToVector = ResultVTy &&
-        ValVTy->getNumElements() != ResultVTy->getNumElements();
+    const bool IsVectorToVector =
+        ResultVTy && ValVTy->getNumElements() != ResultVTy->getNumElements();
     if (IsScalarToVector || IsVectorToVector) {
-      Type *IntTy = Builder.getIntNTy(
-          DL.getTypeAllocSizeInBits(Val->getType()));
+      Type *IntTy =
+          Builder.getIntNTy(DL.getTypeAllocSizeInBits(Val->getType()));
       // Insert bitcast to cast from integer, iM, to vector, <m x iN>.
       Val = Builder.CreateBitCast(Val, IntTy);
       // If result is a ptr, insert bitcast from <m x iN> to <n x ptr>.
       if (isa<PointerType>(ResultVTy->getScalarType())) {
         FixedVectorType *VectorIntTy =
-            FixedVectorType::get(Builder.getIntNTy(
-                DL.getTypeAllocSizeInBits(ResultVTy->getScalarType())),
-                ResultVTy->getNumElements());
+            FixedVectorType::get(Builder.getIntNTy(DL.getTypeAllocSizeInBits(
+                                     ResultVTy->getScalarType())),
+                                 ResultVTy->getNumElements());
         Val = Builder.CreateBitCast(Val, VectorIntTy);
       }
     }
@@ -518,8 +518,8 @@ static Value *promoteAllocaUserToVector(
   };
 
   const auto CreateTempPtrIntCast = [&Builder, DL,
-                                     CreateCastBetweenUnequalNumVecElems]
-                                    (Value *Val, Type *PtrTy) -> Value * {
+                                     CreateCastBetweenUnequalNumVecElems](
+                                        Value *Val, Type *PtrTy) -> Value * {
     assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
     const unsigned Size = DL.getTypeStoreSizeInBits(PtrTy);
     if (!PtrTy->isVectorTy()) {

@github-actions
Copy link

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:
git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 62a25a4c7c3b291f0805894926419f1a64cd83f8 37bbba8572d21f2607d5692aa8e1d039c210ea49 llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll

The following files introduce new uses of undef:

  • llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this to solve the same issue as #119762? I think SROA has a shorter version of this in convertValue

ret <2 x ptr addrspace(5)> %L
}

define <2 x ptr addrspace(270)> @test_subvector_ptralloca_8_3to270(<2 x ptr addrspace(270)> %val) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't care about 270, that's an x86 thing

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants