diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c index e2f02dc64f76..66fd466eccfe 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c @@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT: [[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 4) -// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4) +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA9:![0-9]+]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) { // // CHECK-128-LABEL: @call_bool64_ff( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT: [[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 2) +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load , ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2) // CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -82,11 +90,13 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) { // // CHECK-128-LABEL: @call_bool32_fs( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 4) -// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[OP2:%.*]], i64 4) +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA9]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -108,11 +118,13 @@ fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) { // // CHECK-128-LABEL: @call_bool64_fs( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 2) +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], [[OP2:%.*]], i64 2) // CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -141,8 +153,8 @@ fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) { // CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 4) -// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] @@ -168,7 +180,7 @@ fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) { // CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 2) // CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c index f0fa7e8d07b4..3785036380f5 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c @@ -80,7 +80,9 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) { // // CHECK-128-LABEL: @to_vbool32_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: ret [[TYPE_COERCE:%.*]] +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: ret [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]] // vbool32_t to_vbool32_t(fixed_bool32_t type) { return type; @@ -116,7 +118,9 @@ fixed_bool64_t from_vbool64_t(vbool64_t type) { // // CHECK-128-LABEL: @to_vbool64_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: ret [[TYPE_COERCE:%.*]] +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: ret [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]] // vbool64_t to_vbool64_t(fixed_bool64_t type) { return type; diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c index 7992951346d5..8764616eef23 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c @@ -99,8 +99,8 @@ vbool4_t to_vbool4_t(fixed_bool4_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10:![0-9]+]] // CHECK-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-NEXT: ret [[TMP1]] @@ -111,7 +111,9 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) { // CHECK-LABEL: @to_vbool32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] +// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 +// CHECK-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-NEXT: ret [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]] // vbool32_t to_vbool32_t(fixed_bool32_t type) { return type; @@ -119,7 +121,7 @@ vbool32_t to_vbool32_t(fixed_bool32_t type) { // CHECK-LABEL: @to_vint32m1_t__from_gnu_int32m1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA10]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -130,7 +132,7 @@ vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { // CHECK-LABEL: @from_vint32m1_t__to_gnu_int32m1_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE:%.*]], i64 0) -// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]] +// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA10]] // CHECK-NEXT: ret void // gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { @@ -139,7 +141,7 @@ gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { // CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA10]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -150,7 +152,7 @@ fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { // CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE_COERCE:%.*]], i64 0) -// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]] +// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA10]] // CHECK-NEXT: ret void // gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) { diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c index e1e2220f94d6..fcd4314249ff 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -62,10 +62,7 @@ fixed_bool_t from_svbool_t(svbool_t type) { // CHECK-LABEL: @lax_cast( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE_COERCE:%.*]], i64 0) -// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[TYPE_COERCE:%.*]] to // CHECK-NEXT: ret [[TMP0]] // svint64_t lax_cast(fixed_int32_t type) { @@ -74,9 +71,9 @@ svint64_t lax_cast(fixed_int32_t type) { // CHECK-LABEL: @to_svint32_t__from_gnu_int32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> [[TYPE]], i64 0) -// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> [[TYPE]], i64 0) +// CHECK-NEXT: ret [[CAST_SCALABLE]] // svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) { return type; @@ -84,8 +81,8 @@ svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) { // CHECK-LABEL: @from_svint32_t__to_gnu_int32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE:%.*]], i64 0) -// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE:%.*]], i64 0) +// CHECK-NEXT: store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) { @@ -94,9 +91,9 @@ gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) { // CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]] -// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> [[TYPE]], i64 0) -// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4i32.v16i32( poison, <16 x i32> [[TYPE]], i64 0) +// CHECK-NEXT: ret [[CAST_SCALABLE]] // fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) { return type; @@ -105,7 +102,7 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) { // CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[TYPE_COERCE:%.*]], i64 0) -// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]] +// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) { diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index 7612e553fe32..610f3cfca41c 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -275,6 +275,10 @@ class Attribute { /// when unknown. std::optional getVScaleRangeMax() const; + /// Return the value for vscale based on the vscale_range attribute or 0 when + /// unknown. + unsigned getVScaleValue() const; + // Returns the unwind table kind. UWTableKind getUWTableKind() const; diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index b44f4f8c8687..15370cb0c95c 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -545,6 +545,22 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } + /// This static method returns a VectorType with the same size-in-bits as + /// SizeTy but with an element type that matches the scalar type of EltTy. + static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { + if (SizeTy->getScalarType() == EltTy->getScalarType()) + return SizeTy; + + unsigned EltSize = EltTy->getScalarSizeInBits(); + if (!SizeTy->getPrimitiveSizeInBits().isKnownMultipleOf(EltSize)) + return nullptr; + + ElementCount EC = SizeTy->getElementCount() + .multiplyCoefficientBy(SizeTy->getScalarSizeInBits()) + .divideCoefficientBy(EltSize); + return VectorType::get(EltTy->getScalarType(), EC); + } + /// Return true if the specified type is valid as a element type. static bool isValidElementType(Type *ElemTy); diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h index 59cc489ade40..42a5ab4c58f9 100644 --- a/llvm/lib/IR/AttributeImpl.h +++ b/llvm/lib/IR/AttributeImpl.h @@ -343,6 +343,7 @@ class AttributeSetNode final const; unsigned getVScaleRangeMin() const; std::optional getVScaleRangeMax() const; + unsigned getVScaleValue() const; UWTableKind getUWTableKind() const; AllocFnKind getAllocKind() const; MemoryEffects getMemoryEffects() const; diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index ceb31856283c..7c3a554ba03f 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -469,6 +469,14 @@ std::optional Attribute::getVScaleRangeMax() const { return unpackVScaleRangeArgs(pImpl->getValueAsInt()).second; } +unsigned Attribute::getVScaleValue() const { + std::optional VScale = getVScaleRangeMax(); + if (VScale && *VScale == getVScaleRangeMin()) + return *VScale; + + return 0; +} + UWTableKind Attribute::getUWTableKind() const { assert(hasAttribute(Attribute::UWTable) && "Trying to get unwind table kind from non-uwtable attribute"); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index e88c130cccf2..e88bf1ca4596 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1118,8 +1118,14 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { return PI.setAborted(&LI); TypeSize Size = DL.getTypeStoreSize(LI.getType()); - if (Size.isScalable()) - return PI.setAborted(&LI); + if (Size.isScalable()) { + Attribute Attr = LI.getFunction()->getFnAttribute(Attribute::VScaleRange); + unsigned VScale = Attr.isValid() ? Attr.getVScaleValue() : 0; + if (!VScale) + return PI.setAborted(&LI); + + Size = TypeSize::getFixed(Size.getKnownMinValue() * VScale); + } return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(), LI.isVolatile()); @@ -1133,8 +1139,14 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { return PI.setAborted(&SI); TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType()); - if (StoreSize.isScalable()) - return PI.setAborted(&SI); + if (StoreSize.isScalable()) { + Attribute Attr = SI.getFunction()->getFnAttribute(Attribute::VScaleRange); + unsigned VScale = Attr.isValid() ? Attr.getVScaleValue() : 0; + if (!VScale) + return PI.setAborted(&SI); + + StoreSize = TypeSize::getFixed(StoreSize.getKnownMinValue() * VScale); + } uint64_t Size = StoreSize.getFixedValue(); @@ -1925,7 +1937,8 @@ static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) { /// ensure that we only try to convert viable values. The strategy is that we /// will peel off single element struct and array wrappings to get to an /// underlying value, and convert that value. -static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { +static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, + unsigned VScale = 0) { if (OldTy == NewTy) return true; @@ -1939,8 +1952,24 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { return false; } - if (DL.getTypeSizeInBits(NewTy).getFixedValue() != - DL.getTypeSizeInBits(OldTy).getFixedValue()) + TypeSize NewSize = DL.getTypeSizeInBits(NewTy); + TypeSize OldSize = DL.getTypeSizeInBits(OldTy); + + if (isa(NewTy) && isa(OldTy)) { + if (!VScale || NewTy->isPtrOrPtrVectorTy() || OldTy->isPtrOrPtrVectorTy() || + !VectorType::getWithSizeAndScalar(cast(NewTy), OldTy)) + return false; + + NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale); + } else if (isa(OldTy) && isa(NewTy)) { + if (!VScale || NewTy->isPtrOrPtrVectorTy() || OldTy->isPtrOrPtrVectorTy() || + !VectorType::getWithSizeAndScalar(cast(OldTy), NewTy)) + return false; + + OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale); + } + + if (NewSize != OldSize) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) return false; @@ -1990,7 +2019,15 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy) { Type *OldTy = V->getType(); - assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type"); + +#ifndef NDEBUG + BasicBlock *BB = IRB.GetInsertBlock(); + assert(BB && BB->getParent() && "VScale unknown!"); + Attribute Attr = BB->getParent()->getFnAttribute(Attribute::VScaleRange); + unsigned VScale = Attr.isValid() ? Attr.getVScaleValue() : 0; + assert(canConvertValue(DL, OldTy, NewTy, VScale) && + "Value not convertable to type"); +#endif if (OldTy == NewTy) return V; @@ -2034,6 +2071,18 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, } } + if (isa(NewTy) && isa(OldTy)) { + auto *Ty = VectorType::getWithSizeAndScalar(cast(NewTy), OldTy); + V = IRB.CreateInsertVector(Ty, PoisonValue::get(Ty), V, IRB.getInt64(0)); + return IRB.CreateBitCast(V, NewTy); + } + + if (isa(NewTy) && isa(OldTy)) { + auto *Ty = VectorType::getWithSizeAndScalar(cast(OldTy), NewTy); + V = IRB.CreateBitCast(V, Ty); + return IRB.CreateExtractVector(NewTy, V, IRB.getInt64(0)); + } + return IRB.CreateBitCast(V, NewTy); } @@ -2044,7 +2093,8 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, - const DataLayout &DL) { + const DataLayout &DL, + unsigned VScale) { // First validate the slice offsets. uint64_t BeginOffset = std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset(); @@ -2088,7 +2138,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, assert(LTy->isIntegerTy()); LTy = SplitIntTy; } - if (!canConvertValue(DL, SliceTy, LTy)) + if (!canConvertValue(DL, SliceTy, LTy, VScale)) return false; } else if (StoreInst *SI = dyn_cast(U->getUser())) { if (SI->isVolatile()) @@ -2101,7 +2151,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, assert(STy->isIntegerTy()); STy = SplitIntTy; } - if (!canConvertValue(DL, STy, SliceTy)) + if (!canConvertValue(DL, STy, SliceTy, VScale)) return false; } else { return false; @@ -2116,7 +2166,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, /// (and thus isVectorPromotionViable) over all slices of the alloca for the /// given VectorType. static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, - const DataLayout &DL) { + const DataLayout &DL, unsigned VScale) { uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); @@ -2129,11 +2179,11 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, ElementSize /= 8; for (const Slice &S : P) - if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL)) + if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale)) return false; for (const Slice *S : P.splitSliceTails()) - if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL)) + if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale)) return false; return true; @@ -2148,7 +2198,7 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, - VectorType *CommonVecPtrTy) { + VectorType *CommonVecPtrTy, unsigned VScale) { // If we didn't find a vector type, nothing to do here. if (CandidateTys.empty()) return nullptr; @@ -2224,7 +2274,7 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, }); for (VectorType *VTy : CandidateTys) - if (checkVectorTypeForPromotion(P, VTy, DL)) + if (checkVectorTypeForPromotion(P, VTy, DL, VScale)) return VTy; return nullptr; @@ -2235,7 +2285,7 @@ static VectorType *createAndCheckVectorTypesForPromotion( function_ref CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, - bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) { + bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) { [[maybe_unused]] VectorType *OriginalElt = CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr; // Consider additional vector types where the element type size is a @@ -2260,9 +2310,9 @@ static VectorType *createAndCheckVectorTypesForPromotion( } } - return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy, - CommonEltTy, HaveVecPtrTy, - HaveCommonVecPtrTy, CommonVecPtrTy); + return checkVectorTypesForPromotion( + P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, + HaveCommonVecPtrTy, CommonVecPtrTy, VScale); } /// Test whether the given alloca partitioning and range of slices can be @@ -2274,7 +2324,8 @@ static VectorType *createAndCheckVectorTypesForPromotion( /// SSA value. We only can ensure this for a limited set of operations, and we /// don't want to do the rewrites unless we are confident that the result will /// be promotable, so we have an early test here. -static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { +static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL, + unsigned VScale) { // Collect the candidate types for vector-based promotion. Also track whether // we have different element types. SmallVector CandidateTys; @@ -2286,7 +2337,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { bool HaveCommonEltTy = true; bool HaveCommonVecPtrTy = true; auto CheckCandidateType = [&](Type *Ty) { - if (auto *VTy = dyn_cast(Ty)) { + if (auto *VTy = dyn_cast(Ty)) { // Return if bitcast to vectors is different for total size in bits. if (!CandidateTys.empty()) { VectorType *V = CandidateTys[0]; @@ -2341,14 +2392,14 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { if (auto *VTy = createAndCheckVectorTypesForPromotion( LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, - HaveCommonVecPtrTy, CommonVecPtrTy)) + HaveCommonVecPtrTy, CommonVecPtrTy, VScale)) return VTy; CandidateTys.clear(); return createAndCheckVectorTypesForPromotion( DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy, - CommonVecPtrTy); + CommonVecPtrTy, VScale); } /// Test whether a slice of an alloca is valid for integer widening. @@ -2385,7 +2436,8 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (LI->isVolatile()) return false; // We can't handle loads that extend past the allocated memory. - if (DL.getTypeStoreSize(LI->getType()).getFixedValue() > Size) + TypeSize LoadSize = DL.getTypeStoreSize(LI->getType()); + if (!LoadSize.isFixed() || LoadSize.getFixedValue() > Size) return false; // So far, AllocaSliceRewriter does not support widening split slice tails // in rewriteIntegerLoad. @@ -2410,7 +2462,8 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (SI->isVolatile()) return false; // We can't handle stores that extend past the allocated memory. - if (DL.getTypeStoreSize(ValueTy).getFixedValue() > Size) + TypeSize StoreSize = DL.getTypeStoreSize(ValueTy); + if (!StoreSize.isFixed() || StoreSize.getFixedValue() > Size) return false; // So far, AllocaSliceRewriter does not support widening split slice tails // in rewriteIntegerStore. @@ -2883,8 +2936,6 @@ class AllocaSliceRewriter : public InstVisitor { Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8) : LI.getType(); - const bool IsLoadPastEnd = - DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize; bool IsPtrAdjusted = false; Value *V; if (VecTy) { @@ -2894,8 +2945,9 @@ class AllocaSliceRewriter : public InstVisitor { } else if (NewBeginOffset == NewAllocaBeginOffset && NewEndOffset == NewAllocaEndOffset && (canConvertValue(DL, NewAllocaTy, TargetTy) || - (IsLoadPastEnd && NewAllocaTy->isIntegerTy() && - TargetTy->isIntegerTy() && !LI.isVolatile()))) { + (NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy() && + DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize && + !LI.isVolatile()))) { Value *NewPtr = getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile()); LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr, @@ -3068,7 +3120,8 @@ class AllocaSliceRewriter : public InstVisitor { if (AllocaInst *AI = dyn_cast(V->stripInBoundsOffsets())) Pass.PostPromotionWorklist.insert(AI); - if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedValue()) { + TypeSize StoreSize = DL.getTypeStoreSize(V->getType()); + if (StoreSize.isFixed() && SliceSize < StoreSize.getFixedValue()) { assert(!SI.isVolatile()); assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); @@ -4844,14 +4897,19 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Type *SliceTy = nullptr; VectorType *SliceVecTy = nullptr; const DataLayout &DL = AI.getDataLayout(); + Attribute Attr = AI.getFunction()->getFnAttribute(Attribute::VScaleRange); + unsigned VScale = Attr.isValid() ? Attr.getVScaleValue() : 0; + std::pair CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()); // Do all uses operate on the same type? - if (CommonUseTy.first) - if (DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >= P.size()) { + if (CommonUseTy.first) { + TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first); + if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) { SliceTy = CommonUseTy.first; SliceVecTy = dyn_cast(SliceTy); } + } // If not, can we find an appropriate subtype in the original allocated type? if (!SliceTy) if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), @@ -4872,12 +4930,12 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // If the common use types are not viable for promotion then attempt to find // another type that is viable. - if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL)) + if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL, VScale)) if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), P.beginOffset(), P.size())) { VectorType *TypePartitionVecTy = dyn_cast(TypePartitionTy); if (TypePartitionVecTy && - checkVectorTypeForPromotion(P, TypePartitionVecTy, DL)) + checkVectorTypeForPromotion(P, TypePartitionVecTy, DL, VScale)) SliceTy = TypePartitionTy; } @@ -4888,7 +4946,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); VectorType *VecTy = - IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); + IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL, VScale); if (VecTy) SliceTy = VecTy; diff --git a/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll new file mode 100644 index 000000000000..b4df64a4e45c --- /dev/null +++ b/llvm/test/Transforms/SROA/scalable-vectors-with-known-vscale.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; This test checks that SROA runs mem2reg on scalable vectors. + +define @alloca_nxv16i1( %pg) vscale_range(1) { +; CHECK-LABEL: @alloca_nxv16i1( +; CHECK-NEXT: ret [[PG:%.*]] +; + %pg.addr = alloca + store %pg, ptr %pg.addr + %1 = load , ptr %pg.addr + ret %1 +} + +define @alloca_nxv16i8( %vec) vscale_range(1) { +; CHECK-LABEL: @alloca_nxv16i8( +; CHECK-NEXT: ret [[VEC:%.*]] +; + %vec.addr = alloca + store %vec, ptr %vec.addr + %1 = load , ptr %vec.addr + ret %1 +} + +; Test scalable alloca that can't be promoted. Mem2Reg only considers +; non-volatile loads and stores for promotion. +define @unpromotable_alloca( %vec) vscale_range(1) { +; CHECK-LABEL: @unpromotable_alloca( +; CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca , align 16 +; CHECK-NEXT: store volatile [[VEC:%.*]], ptr [[VEC_ADDR]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load volatile , ptr [[VEC_ADDR]], align 16 +; CHECK-NEXT: ret [[TMP1]] +; + %vec.addr = alloca + store volatile %vec, ptr %vec.addr + %1 = load volatile , ptr %vec.addr + ret %1 +} + +; Test we bail out when using an alloca of a fixed-length vector (VLS) that was +; bitcasted to a scalable vector. +define @cast_alloca_to_svint32_t( %type.coerce) vscale_range(1) { +; CHECK-LABEL: @cast_alloca_to_svint32_t( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32( [[TYPE_COERCE:%.*]], i64 0) +; CHECK-NEXT: [[TYPE_0_VEC_EXPAND:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TYPE_0_VECBLEND:%.*]] = select <16 x i1> , <16 x i32> [[TYPE_0_VEC_EXPAND]], <16 x i32> undef +; CHECK-NEXT: [[TYPE_ADDR_0_VEC_EXTRACT:%.*]] = shufflevector <16 x i32> [[TYPE_0_VECBLEND]], <16 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.vector.insert.nxv4i32.v4i32( poison, <4 x i32> [[TYPE_ADDR_0_VEC_EXTRACT]], i64 0) +; CHECK-NEXT: ret [[TMP2]] +; + %type = alloca <16 x i32> + %type.addr = alloca <16 x i32> + store %type.coerce, ptr %type + %type1 = load <16 x i32>, ptr %type + store <16 x i32> %type1, ptr %type.addr + %1 = load <16 x i32>, ptr %type.addr + %2 = load , ptr %type.addr + ret %2 +} + +; When casting from VLA to VLS via memory check we bail out when producing a +; GEP where the element type is a scalable vector. +define @cast_alloca_from_svint32_t() vscale_range(1) { +; CHECK-LABEL: @cast_alloca_from_svint32_t( +; CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 +; CHECK-NEXT: store <16 x i32> undef, ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT: ret [[TMP1]] +; + %retval = alloca <16 x i32> + %retval.coerce = alloca + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval.coerce, ptr align 16 %retval, i64 64, i1 false) + %1 = load , ptr %retval.coerce + ret %1 +} + +; Test we bail out when using an alloca of a fixed-length vector (VLS) that was +; bitcasted to a scalable vector. +define void @select_load_alloca_to_svdouble_t() vscale_range(1) { +; CHECK-LABEL: @select_load_alloca_to_svdouble_t( +; CHECK-NEXT: [[Z:%.*]] = alloca <16 x half>, align 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null +; CHECK-NEXT: [[VAL:%.*]] = load , ptr [[COND]], align 16 +; CHECK-NEXT: ret void +; + %z = alloca <16 x half> + %cmp = icmp eq i32 0, 0 + %cond = select i1 %cmp, ptr %z, ptr null + %val = load , ptr %cond, align 16 + ret void +} + +define void @select_store_alloca_to_svdouble_t( %val) vscale_range(1) { +; CHECK-LABEL: @select_store_alloca_to_svdouble_t( +; CHECK-NEXT: [[Z:%.*]] = alloca <16 x half>, align 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], ptr [[Z]], ptr null +; CHECK-NEXT: store [[VAL:%.*]], ptr [[COND]], align 16 +; CHECK-NEXT: ret void +; + %z = alloca <16 x half> + %cmp = icmp eq i32 0, 0 + %cond = select i1 %cmp, ptr %z, ptr null + store %val, ptr %cond, align 16 + ret void +} + +define <4 x i32> @fixed_alloca_fixed_from_scalable( %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32( [[A:%.*]], i64 0) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; + %tmp = alloca <4 x i32> + store %a, ptr %tmp + %cast = load <4 x i32>, ptr %tmp + ret <4 x i32> %cast +} + +define <2 x i8> @fixed_alloca_fixed_from_scalable_requires_bitcast( %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_requires_bitcast( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast [[A:%.*]] to +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( [[TMP1]], i64 0) +; CHECK-NEXT: ret <2 x i8> [[TMP2]] +; + %tmp = alloca <2 x i8> + store %a, ptr %tmp + %cast = load <2 x i8>, ptr %tmp + ret <2 x i8> %cast +} + +define @fixed_alloca_scalable_from_fixed(<4 x i32> %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.vector.insert.nxv4i32.v4i32( poison, <4 x i32> [[A:%.*]], i64 0) +; CHECK-NEXT: ret [[TMP1]] +; + %tmp = alloca <4 x i32> + store <4 x i32> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define @fixed_alloca_scalable_from_fixed_requires_bitcast(<2 x i8> %a) vscale_range(1) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_requires_bitcast( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.vector.insert.nxv2i8.v2i8( poison, <2 x i8> [[A:%.*]], i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [[TMP1]] to +; CHECK-NEXT: ret [[TMP2]] +; + %tmp = alloca <2 x i8> + store <2 x i8> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define <4 x i32> @scalable_alloca_fixed_from_scalable( %a) vscale_range(1) { +; CHECK-LABEL: @scalable_alloca_fixed_from_scalable( +; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[CAST:%.*]] = load <4 x i32>, ptr [[TMP]], align 16 +; CHECK-NEXT: ret <4 x i32> [[CAST]] +; + %tmp = alloca + store %a, ptr %tmp + %cast = load <4 x i32>, ptr %tmp + ret <4 x i32> %cast +} + +define @scalable_alloca_scalable_from_fixed(<4 x i32> %a) vscale_range(1) { +; CHECK-LABEL: @scalable_alloca_scalable_from_fixed( +; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 +; CHECK-NEXT: store <4 x i32> [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[CAST:%.*]] = load , ptr [[TMP]], align 16 +; CHECK-NEXT: ret [[CAST]] +; + %tmp = alloca + store <4 x i32> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define i16 @scalar_alloca_scalar_from_scalable( %a) vscale_range(1) { +; CHECK-LABEL: @scalar_alloca_scalar_from_scalable( +; CHECK-NEXT: [[TMP:%.*]] = alloca i16, align 2 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 2 +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = load i16, ptr [[TMP]], align 2 +; CHECK-NEXT: ret i16 [[TMP_0_CAST]] +; + %tmp = alloca i16 + store %a, ptr %tmp + %cast = load i16, ptr %tmp + ret i16 %cast +} + +define @scalar_alloca_scalable_from_scalar(i16 %a) vscale_range(1) { +; CHECK-LABEL: @scalar_alloca_scalable_from_scalar( +; CHECK-NEXT: [[TMP:%.*]] = alloca i16, align 2 +; CHECK-NEXT: store i16 [[A:%.*]], ptr [[TMP]], align 2 +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = load , ptr [[TMP]], align 2 +; CHECK-NEXT: ret [[TMP_0_CAST]] +; + %tmp = alloca i16 + store i16 %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define { <2 x i32>, <2 x i32> } @fixed_struct_alloca_fixed_from_scalable( %a) vscale_range(1) { +; CHECK-LABEL: @fixed_struct_alloca_fixed_from_scalable( +; CHECK-NEXT: [[TMP:%.*]] = alloca { <2 x i32>, <2 x i32> }, align 8 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 8 +; CHECK-NEXT: [[TMP_0_CAST_FCA_0_LOAD:%.*]] = load <2 x i32>, ptr [[TMP]], align 8 +; CHECK-NEXT: [[CAST_FCA_0_INSERT:%.*]] = insertvalue { <2 x i32>, <2 x i32> } poison, <2 x i32> [[TMP_0_CAST_FCA_0_LOAD]], 0 +; CHECK-NEXT: [[TMP_8_CAST_FCA_1_GEP_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 8 +; CHECK-NEXT: [[TMP_8_CAST_FCA_1_LOAD:%.*]] = load <2 x i32>, ptr [[TMP_8_CAST_FCA_1_GEP_SROA_IDX]], align 8 +; CHECK-NEXT: [[CAST_FCA_1_INSERT:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[CAST_FCA_0_INSERT]], <2 x i32> [[TMP_8_CAST_FCA_1_LOAD]], 1 +; CHECK-NEXT: ret { <2 x i32>, <2 x i32> } [[CAST_FCA_1_INSERT]] +; + %tmp = alloca { <2 x i32>, <2 x i32> } + store %a, ptr %tmp + %cast = load { <2 x i32>, <2 x i32> }, ptr %tmp + ret { <2 x i32>, <2 x i32> } %cast +} + +define @fixed_struct_alloca_scalable_from_fixed({ <2 x ptr>, <2 x ptr> } %a) vscale_range(1) { +; CHECK-LABEL: @fixed_struct_alloca_scalable_from_fixed( +; CHECK-NEXT: [[TMP:%.*]] = alloca { <2 x ptr>, <2 x ptr> }, align 16 +; CHECK-NEXT: [[A_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[A:%.*]], 0 +; CHECK-NEXT: store <2 x ptr> [[A_FCA_0_EXTRACT]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[A_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[A]], 1 +; CHECK-NEXT: [[TMP_16_A_FCA_1_GEP_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16 +; CHECK-NEXT: store <2 x ptr> [[A_FCA_1_EXTRACT]], ptr [[TMP_16_A_FCA_1_GEP_SROA_IDX]], align 16 +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = load , ptr [[TMP]], align 16 +; CHECK-NEXT: ret [[TMP_0_CAST]] +; + %tmp = alloca { <2 x ptr>, <2 x ptr> } + store { <2 x ptr>, <2 x ptr> } %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-MODIFY-CFG: {{.*}} +; CHECK-PRESERVE-CFG: {{.*}} diff --git a/llvm/test/Transforms/SROA/scalable-vectors.ll b/llvm/test/Transforms/SROA/scalable-vectors.ll index d892883ce9dc..9d6dec34b35b 100644 --- a/llvm/test/Transforms/SROA/scalable-vectors.ll +++ b/llvm/test/Transforms/SROA/scalable-vectors.ll @@ -2,6 +2,8 @@ ; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + ; This test checks that SROA runs mem2reg on scalable vectors. define @alloca_nxv16i1( %pg) { @@ -110,6 +112,146 @@ define void @select_store_alloca_to_svdouble_t( %val) { ret void } +define <4 x i32> @fixed_alloca_fixed_from_scalable( %a) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable( +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i32>, align 16 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[TMP]], align 16 +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; + %tmp = alloca <4 x i32> + store %a, ptr %tmp + %cast = load <4 x i32>, ptr %tmp + ret <4 x i32> %cast +} + +define <2 x i8> @fixed_alloca_fixed_from_scalable_requires_bitcast( %a) { +; CHECK-LABEL: @fixed_alloca_fixed_from_scalable_requires_bitcast( +; CHECK-NEXT: [[TMP:%.*]] = alloca <2 x i8>, align 2 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[TMP]], align 2 +; CHECK-NEXT: ret <2 x i8> [[TMP2]] +; + %tmp = alloca <2 x i8> + store %a, ptr %tmp + %cast = load <2 x i8>, ptr %tmp + ret <2 x i8> %cast +} + +define @fixed_alloca_scalable_from_fixed(<4 x i32> %a) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed( +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i32>, align 16 +; CHECK-NEXT: store <4 x i32> [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[TMP]], align 16 +; CHECK-NEXT: ret [[TMP1]] +; + %tmp = alloca <4 x i32> + store <4 x i32> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define @fixed_alloca_scalable_from_fixed_requires_bitcast(<2 x i8> %a) { +; CHECK-LABEL: @fixed_alloca_scalable_from_fixed_requires_bitcast( +; CHECK-NEXT: [[TMP:%.*]] = alloca <2 x i8>, align 2 +; CHECK-NEXT: store <2 x i8> [[A:%.*]], ptr [[TMP]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[TMP]], align 2 +; CHECK-NEXT: ret [[TMP2]] +; + %tmp = alloca <2 x i8> + store <2 x i8> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define <4 x i32> @scalable_alloca_fixed_from_scalable( %a) { +; CHECK-LABEL: @scalable_alloca_fixed_from_scalable( +; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[CAST:%.*]] = load <4 x i32>, ptr [[TMP]], align 16 +; CHECK-NEXT: ret <4 x i32> [[CAST]] +; + %tmp = alloca + store %a, ptr %tmp + %cast = load <4 x i32>, ptr %tmp + ret <4 x i32> %cast +} + +define @scalable_alloca_scalable_from_fixed(<4 x i32> %a) { +; CHECK-LABEL: @scalable_alloca_scalable_from_fixed( +; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 +; CHECK-NEXT: store <4 x i32> [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[CAST:%.*]] = load , ptr [[TMP]], align 16 +; CHECK-NEXT: ret [[CAST]] +; + %tmp = alloca + store <4 x i32> %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define i16 @scalar_alloca_scalar_from_scalable( %a) { +; CHECK-LABEL: @scalar_alloca_scalar_from_scalable( +; CHECK-NEXT: [[TMP:%.*]] = alloca i16, align 2 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 2 +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = load i16, ptr [[TMP]], align 2 +; CHECK-NEXT: ret i16 [[TMP_0_CAST]] +; + %tmp = alloca i16 + store %a, ptr %tmp + %cast = load i16, ptr %tmp + ret i16 %cast +} + +define @scalar_alloca_scalable_from_scalar(i16 %a) { +; CHECK-LABEL: @scalar_alloca_scalable_from_scalar( +; CHECK-NEXT: [[TMP:%.*]] = alloca i16, align 2 +; CHECK-NEXT: store i16 [[A:%.*]], ptr [[TMP]], align 2 +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = load , ptr [[TMP]], align 2 +; CHECK-NEXT: ret [[TMP_0_CAST]] +; + %tmp = alloca i16 + store i16 %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + +define { <2 x i32>, <2 x i32> } @fixed_struct_alloca_fixed_from_scalable( %a) { +; CHECK-LABEL: @fixed_struct_alloca_fixed_from_scalable( +; CHECK-NEXT: [[TMP:%.*]] = alloca { <2 x i32>, <2 x i32> }, align 8 +; CHECK-NEXT: store [[A:%.*]], ptr [[TMP]], align 16 +; CHECK-NEXT: [[CAST_FCA_0_GEP:%.*]] = getelementptr inbounds { <2 x i32>, <2 x i32> }, ptr [[TMP]], i32 0, i32 0 +; CHECK-NEXT: [[TMP_0_CAST_FCA_0_LOAD:%.*]] = load <2 x i32>, ptr [[CAST_FCA_0_GEP]], align 8 +; CHECK-NEXT: [[CAST_FCA_0_INSERT:%.*]] = insertvalue { <2 x i32>, <2 x i32> } poison, <2 x i32> [[TMP_0_CAST_FCA_0_LOAD]], 0 +; CHECK-NEXT: [[TMP_8_CAST_FCA_1_GEP_SROA_IDX:%.*]] = getelementptr inbounds { <2 x i32>, <2 x i32> }, ptr [[TMP]], i32 0, i32 1 +; CHECK-NEXT: [[TMP_8_CAST_FCA_1_LOAD:%.*]] = load <2 x i32>, ptr [[TMP_8_CAST_FCA_1_GEP_SROA_IDX]], align 8 +; CHECK-NEXT: [[CAST_FCA_1_INSERT:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[CAST_FCA_0_INSERT]], <2 x i32> [[TMP_8_CAST_FCA_1_LOAD]], 1 +; CHECK-NEXT: ret { <2 x i32>, <2 x i32> } [[CAST_FCA_1_INSERT]] +; + %tmp = alloca { <2 x i32>, <2 x i32> } + store %a, ptr %tmp + %cast = load { <2 x i32>, <2 x i32> }, ptr %tmp + ret { <2 x i32>, <2 x i32> } %cast +} + +define @fixed_struct_alloca_scalable_from_fixed({ <2 x ptr>, <2 x ptr> } %a) { +; CHECK-LABEL: @fixed_struct_alloca_scalable_from_fixed( +; CHECK-NEXT: [[TMP:%.*]] = alloca { <2 x ptr>, <2 x ptr> }, align 16 +; CHECK-NEXT: [[A_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[A:%.*]], 0 +; CHECK-NEXT: [[A_FCA_0_GEP:%.*]] = getelementptr inbounds { <2 x ptr>, <2 x ptr> }, ptr [[TMP]], i32 0, i32 0 +; CHECK-NEXT: store <2 x ptr> [[A_FCA_0_EXTRACT]], ptr [[A_FCA_0_GEP]], align 16 +; CHECK-NEXT: [[A_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[A]], 1 +; CHECK-NEXT: [[TMP_16_A_FCA_1_GEP_SROA_IDX:%.*]] = getelementptr inbounds { <2 x ptr>, <2 x ptr> }, ptr [[TMP]], i32 0, i32 1 +; CHECK-NEXT: store <2 x ptr> [[A_FCA_1_EXTRACT]], ptr [[TMP_16_A_FCA_1_GEP_SROA_IDX]], align 16 +; CHECK-NEXT: [[TMP_0_CAST:%.*]] = load , ptr [[TMP]], align 32 +; CHECK-NEXT: ret [[TMP_0_CAST]] +; + %tmp = alloca { <2 x ptr>, <2 x ptr> } + store { <2 x ptr>, <2 x ptr> } %a, ptr %tmp + %cast = load , ptr %tmp + ret %cast +} + declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}}