diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c index 66fd466eccfe..e2f02dc64f76 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c @@ -26,15 +26,11 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 -// CHECK-128-NEXT: [[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] -// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4) -// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA9:![0-9]+]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 4) +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -56,15 +52,11 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) { // // CHECK-128-LABEL: @call_bool64_ff( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 -// CHECK-128-NEXT: [[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2) +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 2) // CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -90,13 +82,11 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) { // // CHECK-128-LABEL: @call_bool32_fs( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[OP2:%.*]], i64 4) -// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA9]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 4) +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -118,13 +108,11 @@ fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) { // // CHECK-128-LABEL: @call_bool64_fs( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[OP2:%.*]], i64 2) +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 2) // CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -153,8 +141,8 @@ fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) { // CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 4) -// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -180,7 +168,7 @@ fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) { // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 2) // CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c index 3785036380f5..f0fa7e8d07b4 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c @@ -80,9 +80,7 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) { // // CHECK-128-LABEL: @to_vbool32_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 -// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] -// CHECK-128-NEXT: ret [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]] +// CHECK-128-NEXT: ret [[TYPE_COERCE:%.*]] // vbool32_t to_vbool32_t(fixed_bool32_t type) { return type; @@ -118,9 +116,7 @@ fixed_bool64_t from_vbool64_t(vbool64_t type) { // // CHECK-128-LABEL: @to_vbool64_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 -// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] -// CHECK-128-NEXT: ret [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]] +// CHECK-128-NEXT: ret [[TYPE_COERCE:%.*]] // vbool64_t to_vbool64_t(fixed_bool64_t type) { return type; diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c index 8764616eef23..7992951346d5 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c @@ -99,8 +99,8 @@ vbool4_t to_vbool4_t(fixed_bool4_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-NEXT: ret [[TMP1]] @@ -111,9 +111,7 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) { // CHECK-LABEL: @to_vbool32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 -// CHECK-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] -// CHECK-NEXT: ret [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]] +// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] // vbool32_t to_vbool32_t(fixed_bool32_t type) { return type; @@ -121,7 +119,7 @@ vbool32_t to_vbool32_t(fixed_bool32_t type) { // CHECK-LABEL: @to_vint32m1_t__from_gnu_int32m1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA10]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -132,7 +130,7 @@ vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { // CHECK-LABEL: @from_vint32m1_t__to_gnu_int32m1_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE:%.*]], i64 0) -// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA10]] +// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { @@ -141,7 +139,7 @@ gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { // CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA10]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -152,7 +150,7 @@ fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { // CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE_COERCE:%.*]], i64 0) -// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA10]] +// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) { diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index 610f3cfca41c..7612e553fe32 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -275,10 +275,6 @@ class Attribute { /// when unknown. std::optional getVScaleRangeMax() const; - /// Return the value for vscale based on the vscale_range attribute or 0 when - /// unknown. - unsigned getVScaleValue() const; - // Returns the unwind table kind. UWTableKind getUWTableKind() const; diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index 15370cb0c95c..71ab03aa1ae1 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -545,8 +545,9 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } - /// This static method returns a VectorType with the same size-in-bits as - /// SizeTy but with an element type that matches the scalar type of EltTy. + /// This static method attempts to construct a VectorType with the same + /// size-in-bits as SizeTy but with an element type that matches the scalar + /// type of EltTy. The VectorType is returned on success, nullptr otherwise. static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { if (SizeTy->getScalarType() == EltTy->getScalarType()) return SizeTy; diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index e7afcbd31420..b1594b5f03d8 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -1030,6 +1030,10 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node { /// Return value: true => null pointer dereference is not undefined. bool nullPointerIsDefined() const; + /// Return the value for vscale based on the vscale_range attribute or 0 when + /// unknown. + unsigned getVScaleValue() const; + private: void allocHungoffUselist(); template void setHungoffOperand(Constant *C); diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h index 42a5ab4c58f9..59cc489ade40 100644 --- a/llvm/lib/IR/AttributeImpl.h +++ b/llvm/lib/IR/AttributeImpl.h @@ -343,7 +343,6 @@ class AttributeSetNode final const; unsigned getVScaleRangeMin() const; std::optional getVScaleRangeMax() const; - unsigned getVScaleValue() const; UWTableKind getUWTableKind() const; AllocFnKind getAllocKind() const; MemoryEffects getMemoryEffects() const; diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index 7c3a554ba03f..ceb31856283c 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -469,14 +469,6 @@ std::optional Attribute::getVScaleRangeMax() const { return unpackVScaleRangeArgs(pImpl->getValueAsInt()).second; } -unsigned Attribute::getVScaleValue() const { - std::optional VScale = getVScaleRangeMax(); - if (VScale && *VScale == getVScaleRangeMin()) - return *VScale; - - return 0; -} - UWTableKind Attribute::getUWTableKind() const { assert(hasAttribute(Attribute::UWTable) && "Trying to get unwind table kind from non-uwtable attribute"); diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 9c5dd5aeb92e..58f7b6f146a6 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1184,6 +1184,18 @@ bool Function::nullPointerIsDefined() const { return hasFnAttribute(Attribute::NullPointerIsValid); } +unsigned Function::getVScaleValue() const { + Attribute Attr = getFnAttribute(Attribute::VScaleRange); + if (!Attr.isValid()) + return 0; + + unsigned VScale = Attr.getVScaleRangeMin(); + if (VScale && VScale == Attr.getVScaleRangeMax()) + return VScale; + + return 0; +} + bool llvm::NullPointerIsDefined(const Function *F, unsigned AS) { if (F && F->nullPointerIsDefined()) return true; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index e88bf1ca4596..5c0f1fcefa52 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1119,8 +1119,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { TypeSize Size = DL.getTypeStoreSize(LI.getType()); if (Size.isScalable()) { - Attribute Attr = LI.getFunction()->getFnAttribute(Attribute::VScaleRange); - unsigned VScale = Attr.isValid() ? Attr.getVScaleValue() : 0; + unsigned VScale = LI.getFunction()->getVScaleValue(); if (!VScale) return PI.setAborted(&LI); @@ -1140,8 +1139,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType()); if (StoreSize.isScalable()) { - Attribute Attr = SI.getFunction()->getFnAttribute(Attribute::VScaleRange); - unsigned VScale = Attr.isValid() ? Attr.getVScaleValue() : 0; + unsigned VScale = SI.getFunction()->getVScaleValue(); if (!VScale) return PI.setAborted(&SI); @@ -1955,18 +1953,29 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, TypeSize NewSize = DL.getTypeSizeInBits(NewTy); TypeSize OldSize = DL.getTypeSizeInBits(OldTy); - if (isa(NewTy) && isa(OldTy)) { - if (!VScale || NewTy->isPtrOrPtrVectorTy() || OldTy->isPtrOrPtrVectorTy() || - !VectorType::getWithSizeAndScalar(cast(NewTy), OldTy)) + if ((isa(NewTy) && isa(OldTy)) || + (isa(OldTy) && isa(NewTy))) { + // Conversion is only possible when the size of scalable vectors is known. + if (!VScale) return false; - NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale); - } else if (isa(OldTy) && isa(NewTy)) { - if (!VScale || NewTy->isPtrOrPtrVectorTy() || OldTy->isPtrOrPtrVectorTy() || - !VectorType::getWithSizeAndScalar(cast(OldTy), NewTy)) - return false; + // For ptr-to-int and int-to-ptr casts, the pointer side is resolved within + // a single domain (either fixed or scalable). Any additional conversion + // between fixed and scalable types is handled through integer types. + auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy; + auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy; + + if (isa(NewTy)) { + if (!VectorType::getWithSizeAndScalar(cast(NewVTy), OldVTy)) + return false; - OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale); + NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale); + } else { + if (!VectorType::getWithSizeAndScalar(cast(OldVTy), NewVTy)) + return false; + + OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale); + } } if (NewSize != OldSize) @@ -2023,8 +2032,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, #ifndef NDEBUG BasicBlock *BB = IRB.GetInsertBlock(); assert(BB && BB->getParent() && "VScale unknown!"); - Attribute Attr = BB->getParent()->getFnAttribute(Attribute::VScaleRange); - unsigned VScale = Attr.isValid() ? Attr.getVScaleValue() : 0; + unsigned VScale = BB->getParent()->getVScaleValue(); assert(canConvertValue(DL, OldTy, NewTy, VScale) && "Value not convertable to type"); #endif @@ -2035,13 +2043,41 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, assert(!(isa(OldTy) && isa(NewTy)) && "Integer types must be the exact same to convert."); + // A variant of bitcast that supports a mixture of fixed and scalable types + // that are know to have the same size. + auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * { + Type *InTy = In->getType(); + if (InTy == Ty) + return In; + + if (isa(InTy) && isa(Ty)) { + // For vscale_range(2) expand <4 x i32> to --> + // <4 x i32> to to + auto *VTy = VectorType::getWithSizeAndScalar(cast(Ty), InTy); + return IRB.CreateBitCast(IRB.CreateInsertVector(VTy, + PoisonValue::get(VTy), In, + IRB.getInt64(0)), + Ty); + } + + if (isa(InTy) && isa(Ty)) { + // For vscale_range(2) expand to <4 x i32> --> + // to to <4 x i32> + auto *VTy = VectorType::getWithSizeAndScalar(cast(InTy), Ty); + return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy), + IRB.getInt64(0)); + } + + return IRB.CreateBitCast(In, Ty); + }; + // See if we need inttoptr for this type pair. May require additional bitcast. if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) { // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*> // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*> // Directly handle i64 to i8* - return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), + return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)), NewTy); } @@ -2051,7 +2087,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32> // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32> // Expand i8* to i64 --> i8* to i64 to i64 - return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), + return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), NewTy); } @@ -2066,24 +2102,14 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // size. if (OldAS != NewAS) { assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS)); - return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), - NewTy); + return IRB.CreateIntToPtr( + CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), + DL.getIntPtrType(NewTy)), + NewTy); } } - if (isa(NewTy) && isa(OldTy)) { - auto *Ty = VectorType::getWithSizeAndScalar(cast(NewTy), OldTy); - V = IRB.CreateInsertVector(Ty, PoisonValue::get(Ty), V, IRB.getInt64(0)); - return IRB.CreateBitCast(V, NewTy); - } - - if (isa(NewTy) && isa(OldTy)) { - auto *Ty = VectorType::getWithSizeAndScalar(cast(OldTy), NewTy); - V = IRB.CreateBitCast(V, Ty); - return IRB.CreateExtractVector(NewTy, V, IRB.getInt64(0)); - } - - return IRB.CreateBitCast(V, NewTy); + return CreateBitCastLike(V, NewTy); } /// Test whether the given slice use can be promoted to a vector. @@ -4897,8 +4923,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Type *SliceTy = nullptr; VectorType *SliceVecTy = nullptr; const DataLayout &DL = AI.getDataLayout(); - Attribute Attr = AI.getFunction()->getFnAttribute(Attribute::VScaleRange); - unsigned VScale = Attr.isValid() ? Attr.getVScaleValue() : 0; + unsigned VScale = AI.getFunction()->getVScaleValue(); std::pair CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()); diff --git a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll index b4df64a4e45c..85715e406e06 100644 --- a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll +++ b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll @@ -67,11 +67,12 @@ define @cast_alloca_to_svint32_t( %type.coe define @cast_alloca_from_svint32_t() vscale_range(1) { ; CHECK-LABEL: @cast_alloca_from_svint32_t( ; CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -; CHECK-NEXT: store <16 x i32> undef, ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 16 ; CHECK-NEXT: ret [[TMP1]] ; %retval = alloca <16 x i32> + store <16 x i32> zeroinitializer, ptr %retval %retval.coerce = alloca call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 %retval, i64 64, i1 false) %1 = load , ptr %retval.coerce @@ -133,6 +134,56 @@ define <2 x i8> @fixed_alloca_fixed_from_scalable_requires_bitcast( %cast } +define <2 x ptr> @fixed_alloca_fixed_from_scalable_inttoptr( %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_inttoptr( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast [[A:%.*]] to +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr <2 x i64> [[TMP3]] to <2 x ptr> +; CHECK-NEXT: ret <2 x ptr> [[TMP2]] +; + %tmp = alloca <4 x i32> + store %a, ptr %tmp + %cast = load <2 x ptr>, ptr %tmp + ret <2 x ptr> %cast +} + +define <4 x i32> @fixed_alloca_fixed_from_scalable_ptrtoint( %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoint( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint [[A:%.*]] to +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [[TMP1]] to +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32( [[TMP2]], i64 0) +; CHECK-NEXT: ret <4 x i32> [[TMP_0_CAST]] +; + %tmp = alloca <4 x i32> + store %a, ptr %tmp + %cast = load <4 x i32>, ptr %tmp + ret <4 x i32> %cast +} + +define <2 x ptr> @fixed_alloca_fixed_from_scalable_ptrtoptr( %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoptr( +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = call <2 x ptr> @llvm.vector.extract.v2p0.nxv2p0( [[A:%.*]], i64 0) +; CHECK-NEXT: ret <2 x ptr> [[TMP_0_CAST]] +; + %tmp = alloca <2 x ptr> + store %a, ptr %tmp + %cast = load <2 x ptr>, ptr %tmp + ret <2 x ptr> %cast +} + +define <2 x ptr> @fixed_alloca_fixed_from_scalable_ptrtoptr_different_addrspace( %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoptr_different_addrspace( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint [[A:%.*]] to +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr> +; CHECK-NEXT: ret <2 x ptr> [[TMP3]] +; + %tmp = alloca <2 x ptr> + store %a, ptr %tmp + %cast = load <2 x ptr>, ptr %tmp + ret <2 x ptr> %cast +} + define @fixed_alloca_scalable_from_fixed(<4 x i32> %a) vscale_range(1) { ; CHECK-LABEL: @fixed_alloca_scalable_from_fixed( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.vector.insert.nxv4i32.v4i32( poison, <4 x i32> [[A:%.*]], i64 0) @@ -156,6 +207,56 @@ define @fixed_alloca_scalable_from_fixed_requires_bitcast(<2 ret %cast } +define @fixed_alloca_scalable_from_fixed_inttoptr(<4 x i32> %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_inttoptr( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.vector.insert.nxv4i32.v4i32( poison, <4 x i32> [[A:%.*]], i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [[TMP1]] to +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = inttoptr [[TMP2]] to +; CHECK-NEXT: ret [[TMP_0_CAST]] +; + %tmp = alloca <4 x i32> + store <4 x i32> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define @fixed_alloca_scalable_from_fixed_ptrtoint(<2 x ptr> %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoint( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = bitcast [[TMP2]] to +; CHECK-NEXT: ret [[TMP_0_CAST]] +; + %tmp = alloca <4 x i32> + store <2 x ptr> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define @fixed_alloca_scalable_from_fixed_ptrtoptr(<2 x ptr> %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoptr( +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = call @llvm.vector.insert.nxv2p0.v2p0( poison, <2 x ptr> [[A:%.*]], i64 0) +; CHECK-NEXT: ret [[TMP_0_CAST]] +; + %tmp = alloca <2 x ptr> + store <2 x ptr> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define @fixed_alloca_scalable_from_fixed_ptrtoptr_different_addrspace(<2 x ptr> %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoptr_different_addrspace( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr [[TMP2]] to +; CHECK-NEXT: ret [[TMP3]] +; + %tmp = alloca <2 x ptr> + store <2 x ptr> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + define <4 x i32> @scalable_alloca_fixed_from_scalable( %a) vscale_range(1) { ; CHECK-LABEL: @scalable_alloca_fixed_from_scalable( ; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 diff --git a/llvm/test/Transforms/SROA/scalable-vectors.ll b/llvm/test/Transforms/SROA/scalable-vectors.ll index 9d6dec34b35b..346814d9f630 100644 --- a/llvm/test/Transforms/SROA/scalable-vectors.ll +++ b/llvm/test/Transforms/SROA/scalable-vectors.ll @@ -69,11 +69,12 @@ define @cast_alloca_to_svint32_t( %type.coe define @cast_alloca_from_svint32_t() { ; CHECK-LABEL: @cast_alloca_from_svint32_t( ; CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -; CHECK-NEXT: store <16 x i32> undef, ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[RETVAL_COERCE]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 16 ; CHECK-NEXT: ret [[TMP1]] ; %retval = alloca <16 x i32> + store <16 x i32> zeroinitializer, ptr %retval %retval.coerce = alloca call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 %retval, i64 64, i1 false) %1 = load , ptr %retval.coerce @@ -138,6 +139,45 @@ define <2 x i8> @fixed_alloca_fixed_from_scalable_requires_bitcast( %cast } +define <2 x ptr> @fixed_alloca_fixed_from_scalable_inttoptr( %a) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_inttoptr( +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i32>, align 16 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x ptr>, ptr [[TMP]], align 16 +; CHECK-NEXT: ret <2 x ptr> [[TMP2]] +; + %tmp = alloca <4 x i32> + store %a, ptr %tmp + %cast = load <2 x ptr>, ptr %tmp + ret <2 x ptr> %cast +} + +define <4 x i32> @fixed_alloca_fixed_from_scalable_ptrtoint( %a) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoint( +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i32>, align 16 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = load <4 x i32>, ptr [[TMP]], align 16 +; CHECK-NEXT: ret <4 x i32> [[TMP_0_CAST]] +; + %tmp = alloca <4 x i32> + store %a, ptr %tmp + %cast = load <4 x i32>, ptr %tmp + ret <4 x i32> %cast +} + +define <2 x ptr> @fixed_alloca_fixed_from_scalable_ptrtoptr( %a) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_ptrtoptr( +; CHECK-NEXT: [[TMP:%.*]] = alloca <2 x ptr>, align 16 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[CAST:%.*]] = load <2 x ptr>, ptr [[TMP]], align 16 +; CHECK-NEXT: ret <2 x ptr> [[CAST]] +; + %tmp = alloca <2 x ptr> + store %a, ptr %tmp + %cast = load <2 x ptr>, ptr %tmp + ret <2 x ptr> %cast +} + define @fixed_alloca_scalable_from_fixed(<4 x i32> %a) { ; CHECK-LABEL: @fixed_alloca_scalable_from_fixed( ; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i32>, align 16 @@ -164,6 +204,45 @@ define @fixed_alloca_scalable_from_fixed_requires_bitcast(<2 ret %cast } +define @fixed_alloca_scalable_from_fixed_inttoptr(<4 x i32> %a) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_inttoptr( +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i32>, align 16 +; CHECK-NEXT: store <4 x i32> [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = load , ptr [[TMP]], align 16 +; CHECK-NEXT: ret [[TMP_0_CAST]] +; + %tmp = alloca <4 x i32> + store <4 x i32> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define @fixed_alloca_scalable_from_fixed_ptrtoint(<2 x ptr> %a) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoint( +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i32>, align 16 +; CHECK-NEXT: store <2 x ptr> [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = load , ptr [[TMP]], align 16 +; CHECK-NEXT: ret [[TMP_0_CAST]] +; + %tmp = alloca <4 x i32> + store <2 x ptr> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define @fixed_alloca_scalable_from_fixed_ptrtoptr(<2 x ptr> %a) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_ptrtoptr( +; CHECK-NEXT: [[TMP:%.*]] = alloca <2 x ptr>, align 16 +; CHECK-NEXT: store <2 x ptr> [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[CAST:%.*]] = load , ptr [[TMP]], align 16 +; CHECK-NEXT: ret [[CAST]] +; + %tmp = alloca <2 x ptr> + store <2 x ptr> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + define <4 x i32> @scalable_alloca_fixed_from_scalable( %a) { ; CHECK-LABEL: @scalable_alloca_fixed_from_scalable( ; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16