Skip to content

Commit e86986f

Browse files
committed
[CodeGen][RISCV] Use vscale_range to handle more fixed<->scalable casts of i1 vectors.
RISC-V with -mrvv-vector-bits-min supports giving a size to our scalable vector types. To do this, we represent the vector as a fixed vector in memory and need to cast back and force to scable vectors. For i1 vectors, we use an i8 vector in memory. If there are less than 8 bits we use a <1 x i8> vector with some undefined bits. The cast code previously fell back to copying through memory if the known minimum size of the scable i1 was not divisible by 8. This used a <vscale x X x i1> load or store from a fixed vector alloca. If X is less than 8, DataLayout indicates that the load/store reads/writes vscale bytes even if vscale is known and vscale*X is less than or equal to 8. This means the load or store is outside the bounds of the fixed size alloca as far as DataLayout is concerned leading to undefined behavior. This patch makes use of the known value of vscale_range to avoid casting through memory. Hopefully this allows llvm#130973 to proceed.
1 parent 79210fe commit e86986f

8 files changed

+179
-189
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,6 +1378,35 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
13781378
Result = CGF.Builder.CreateBitCast(Result, Ty);
13791379
return Result;
13801380
}
1381+
1382+
// If we are casting a fixed i8 vector to a scalable i1 predicate
1383+
// vector, and we weren't able to handle it above, try using what we know
1384+
// about vscale to insert a fixed i1 vector into the scalable vector.
1385+
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
1386+
FixedSrcTy->getElementType()->isIntegerTy(8)) {
1387+
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
1388+
CGF.getContext().getTargetInfo().getVScaleRange(CGF.getLangOpts(),
1389+
false);
1390+
if (VScaleRange && VScaleRange->first == VScaleRange->second &&
1391+
VScaleRange->first <= FixedSrcTy->getNumElements() * 8) {
1392+
llvm::Value *Load = CGF.Builder.CreateLoad(Src);
1393+
unsigned VScale = VScaleRange->first;
1394+
llvm::Type *WideFixedTy =
1395+
llvm::FixedVectorType::get(ScalableDstTy->getElementType(),
1396+
FixedSrcTy->getNumElements() * 8);
1397+
Load = CGF.Builder.CreateBitCast(Load, WideFixedTy);
1398+
llvm::Type *FixedTy = llvm::FixedVectorType::get(
1399+
ScalableDstTy->getElementType(),
1400+
ScalableDstTy->getElementCount().getKnownMinValue() * VScale);
1401+
// If the fixed i8 vector is larger than the i1 vector, we need to
1402+
// extract.
1403+
if (FixedTy != WideFixedTy)
1404+
Load = CGF.Builder.CreateExtractVector(FixedTy, Load, uint64_t(0));
1405+
return CGF.Builder.CreateInsertVector(
1406+
ScalableDstTy, llvm::PoisonValue::get(ScalableDstTy), Load,
1407+
uint64_t(0));
1408+
}
1409+
}
13811410
}
13821411
}
13831412

@@ -1485,6 +1514,32 @@ CoerceScalableToFixed(CodeGenFunction &CGF, llvm::FixedVectorType *ToTy,
14851514
V = CGF.Builder.CreateExtractVector(ToTy, V, uint64_t(0), "cast.fixed");
14861515
return {V, true};
14871516
}
1517+
1518+
// If we are casting a scalable i1 predicate vector to a fixed i8
1519+
// vector, and we weren't able to handle it above, try using what we know
1520+
// about vscale to extract a fixed i1 vector from the scalable vector.
1521+
if (FromTy->getElementType()->isIntegerTy(1) &&
1522+
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
1523+
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
1524+
CGF.getContext().getTargetInfo().getVScaleRange(CGF.getLangOpts(),
1525+
false);
1526+
if (VScaleRange && VScaleRange->first == VScaleRange->second &&
1527+
VScaleRange->first <= ToTy->getNumElements() * 8) {
1528+
unsigned VScale = VScaleRange->first;
1529+
llvm::Type *FixedTy = llvm::FixedVectorType::get(
1530+
FromTy->getElementType(),
1531+
FromTy->getElementCount().getKnownMinValue() * VScale);
1532+
V = CGF.Builder.CreateExtractVector(FixedTy, V, uint64_t(0));
1533+
llvm::Type *WideFixedTy = llvm::FixedVectorType::get(
1534+
FromTy->getElementType(), ToTy->getNumElements() * 8);
1535+
if (FixedTy != WideFixedTy)
1536+
V = CGF.Builder.CreateInsertVector(
1537+
WideFixedTy, llvm::PoisonValue::get(WideFixedTy), V, uint64_t(0));
1538+
V = CGF.Builder.CreateBitCast(V, ToTy);
1539+
return {V, true};
1540+
}
1541+
}
1542+
14881543
return {V, false};
14891544
}
14901545

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,6 +2493,35 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
24932493
Result = Builder.CreateBitCast(Result, DstTy);
24942494
return Result;
24952495
}
2496+
2497+
// If we are casting a fixed i8 vector to a scalable i1 predicate
2498+
// vector, and we weren't able to handle it above, try using what we
2499+
// know about vscale to insert a fixed i1 vector into the scalable
2500+
// vector.
2501+
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
2502+
FixedSrcTy->getElementType()->isIntegerTy(8)) {
2503+
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
2504+
CGF.getContext().getTargetInfo().getVScaleRange(CGF.getLangOpts(),
2505+
false);
2506+
if (VScaleRange && VScaleRange->first == VScaleRange->second &&
2507+
VScaleRange->first <= FixedSrcTy->getNumElements() * 8) {
2508+
unsigned VScale = VScaleRange->first;
2509+
llvm::Type *WideFixedTy =
2510+
llvm::FixedVectorType::get(ScalableDstTy->getElementType(),
2511+
FixedSrcTy->getNumElements() * 8);
2512+
Src = Builder.CreateBitCast(Src, WideFixedTy);
2513+
llvm::Type *FixedTy = llvm::FixedVectorType::get(
2514+
ScalableDstTy->getElementType(),
2515+
ScalableDstTy->getElementCount().getKnownMinValue() * VScale);
2516+
// If the fixed i8 vector is larger than the i1 vector, we need to
2517+
// extract.
2518+
if (FixedTy != WideFixedTy)
2519+
Src = Builder.CreateExtractVector(FixedTy, Src, uint64_t(0));
2520+
return Builder.CreateInsertVector(
2521+
ScalableDstTy, llvm::PoisonValue::get(ScalableDstTy), Src,
2522+
uint64_t(0));
2523+
}
2524+
}
24962525
}
24972526
}
24982527

@@ -2514,6 +2543,35 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
25142543
if (ScalableSrcTy->getElementType() == FixedDstTy->getElementType())
25152544
return Builder.CreateExtractVector(DstTy, Src, uint64_t(0),
25162545
"cast.fixed");
2546+
2547+
// If we are casting a scalable i1 predicate vector to a fixed i8
2548+
// vector, and we weren't able to handle it above, try using what we
2549+
// know about vscale to extract a fixed i1 vector from the scalable
2550+
// vector.
2551+
if (ScalableSrcTy->getElementType()->isIntegerTy(1) &&
2552+
FixedDstTy->getElementType()->isIntegerTy(8)) {
2553+
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
2554+
CGF.getContext().getTargetInfo().getVScaleRange(CGF.getLangOpts(),
2555+
false);
2556+
if (VScaleRange && VScaleRange->first == VScaleRange->second &&
2557+
VScaleRange->first <= FixedDstTy->getNumElements() * 8) {
2558+
unsigned VScale = VScaleRange->first;
2559+
llvm::Type *FixedTy = llvm::FixedVectorType::get(
2560+
ScalableSrcTy->getElementType(),
2561+
ScalableSrcTy->getElementCount().getKnownMinValue() * VScale);
2562+
Src = Builder.CreateExtractVector(FixedTy, Src, uint64_t(0));
2563+
llvm::Type *WideFixedTy =
2564+
llvm::FixedVectorType::get(ScalableSrcTy->getElementType(),
2565+
FixedDstTy->getNumElements() * 8);
2566+
// If the fixed i8 vector is larger than the i1 vector, we need to
2567+
// widen the i1 vector.
2568+
if (FixedTy != WideFixedTy)
2569+
Src = Builder.CreateInsertVector(
2570+
WideFixedTy, llvm::PoisonValue::get(WideFixedTy), Src,
2571+
uint64_t(0));
2572+
return Builder.CreateBitCast(Src, FixedDstTy);
2573+
}
2574+
}
25172575
}
25182576
}
25192577

clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c

Lines changed: 16 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,12 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_
1515

1616
// CHECK-64-LABEL: @call_bool32_ff(
1717
// CHECK-64-NEXT: entry:
18-
// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
19-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
20-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 2)
21-
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
22-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
23-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
24-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
18+
// CHECK-64-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[TMP1:%.*]], i64 2)
2519
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
2620
//
2721
// CHECK-128-LABEL: @call_bool32_ff(
2822
// CHECK-128-NEXT: entry:
29-
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
30-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
31-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 4)
32-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
33-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
34-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
35-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
23+
// CHECK-128-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[TMP1:%.*]], i64 4)
3624
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
3725
//
3826
fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
@@ -41,24 +29,12 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
4129

4230
// CHECK-64-LABEL: @call_bool64_ff(
4331
// CHECK-64-NEXT: entry:
44-
// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
45-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
46-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 1)
47-
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
48-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
49-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
50-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
32+
// CHECK-64-NEXT: [[TMP2:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[TMP1:%.*]], i64 1)
5133
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
5234
//
5335
// CHECK-128-LABEL: @call_bool64_ff(
5436
// CHECK-128-NEXT: entry:
55-
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
56-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
57-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 2)
58-
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
59-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
60-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
61-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
37+
// CHECK-128-NEXT: [[TMP2:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[TMP1:%.*]], i64 2)
6238
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
6339
//
6440
fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {
@@ -71,51 +47,27 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {
7147

7248
// CHECK-64-LABEL: @call_bool32_fs(
7349
// CHECK-64-NEXT: entry:
74-
// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
75-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
76-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
77-
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
78-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
79-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
80-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
81-
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
50+
// CHECK-64-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
51+
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP1]]
8252
//
8353
// CHECK-128-LABEL: @call_bool32_fs(
8454
// CHECK-128-NEXT: entry:
85-
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
86-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
87-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
88-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
89-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
90-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
91-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
92-
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
55+
// CHECK-128-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
56+
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP1]]
9357
//
9458
fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) {
9559
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32);
9660
}
9761

9862
// CHECK-64-LABEL: @call_bool64_fs(
9963
// CHECK-64-NEXT: entry:
100-
// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
101-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
102-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
103-
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
104-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
105-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
106-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
107-
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
64+
// CHECK-64-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
65+
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP1]]
10866
//
10967
// CHECK-128-LABEL: @call_bool64_fs(
11068
// CHECK-128-NEXT: entry:
111-
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
112-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
113-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
114-
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
115-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
116-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
117-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
118-
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
69+
// CHECK-128-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
70+
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP1]]
11971
//
12072
fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {
12173
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64);
@@ -127,51 +79,27 @@ fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {
12779

12880
// CHECK-64-LABEL: @call_bool32_ss(
12981
// CHECK-64-NEXT: entry:
130-
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
131-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
13282
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
133-
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
134-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
135-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
136-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
137-
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
83+
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP0]]
13884
//
13985
// CHECK-128-LABEL: @call_bool32_ss(
14086
// CHECK-128-NEXT: entry:
141-
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
142-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
14387
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
144-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
145-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
146-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
147-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
148-
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
88+
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP0]]
14989
//
15090
fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) {
15191
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32);
15292
}
15393

15494
// CHECK-64-LABEL: @call_bool64_ss(
15595
// CHECK-64-NEXT: entry:
156-
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 1 x i1>, align 1
157-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
15896
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
159-
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
160-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
161-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
162-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
163-
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
97+
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP0]]
16498
//
16599
// CHECK-128-LABEL: @call_bool64_ss(
166100
// CHECK-128-NEXT: entry:
167-
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 1 x i1>, align 1
168-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
169101
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
170-
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
171-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
172-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
173-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
174-
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
102+
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP0]]
175103
//
176104
fixed_bool64_t call_bool64_ss(vbool64_t op1, vbool64_t op2) {
177105
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64);

0 commit comments

Comments
 (0)