Skip to content

Commit 62ac4e3

Browse files
authored
[Clang] Support generic bit counting builtins on fixed boolean vectors (#154203)
Summary: Boolean vectors as implemented in clang can be bit-casted to an integer that is rounded up to the next primitive sized integer. Users can do this themselves, but since the counting bits are very likely to be used with bitmasks like this and the generic forms are expected to be generic it seems reasonable that we handle this case directly.
1 parent 9c410dd commit 62ac4e3

File tree

8 files changed

+349
-208
lines changed

8 files changed

+349
-208
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4182,7 +4182,7 @@ builtin, the mangler emits their usual pattern without any special treatment.
41824182
-----------------------
41834183
41844184
``__builtin_popcountg`` returns the number of 1 bits in the argument. The
4185-
argument can be of any unsigned integer type.
4185+
argument can be of any unsigned integer type or fixed boolean vector.
41864186
41874187
**Syntax**:
41884188
@@ -4214,7 +4214,13 @@ such as ``unsigned __int128`` and C23 ``unsigned _BitInt(N)``.
42144214
42154215
``__builtin_clzg`` (respectively ``__builtin_ctzg``) returns the number of
42164216
leading (respectively trailing) 0 bits in the first argument. The first argument
4217-
can be of any unsigned integer type.
4217+
can be of any unsigned integer type or fixed boolean vector.
4218+
4219+
For boolean vectors, these builtins interpret the vector like a bit-field where
4220+
the ith element of the vector is bit i of the bit-field, counting from the
4221+
least significant end. ``__builtin_clzg`` returns the number of zero elements at
4222+
the end of the vector, while ``__builtin_ctzg`` returns the number of zero
4223+
elements at the start of the vector.
42184224
42194225
If the first argument is 0 and an optional second argument of ``int`` type is
42204226
provided, then the second argument is returned. If the first argument is 0, but

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ Non-comprehensive list of changes in this release
159159
- Added ``__builtin_masked_load`` and ``__builtin_masked_store`` for conditional
160160
memory loads from vectors. Binds to the LLVM intrinsic of the same name.
161161

162+
- The ``__builtin_popcountg``, ``__builtin_ctzg``, and ``__builtin_clzg``
163+
functions now accept fixed-size boolean vectors.
164+
162165
- Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and ``ptrauth_intrinsics``
163166
features has been deprecated, and is restricted to the arm64e target only. The
164167
correct method to check for these features is to test for the ``__PTRAUTH__``

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,22 @@ static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC,
141141
S.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr);
142142
}
143143

144+
static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
145+
assert(Val.getFieldDesc()->isPrimitiveArray() &&
146+
Val.getFieldDesc()->getElemQualType()->isBooleanType() &&
147+
"Not a boolean vector");
148+
unsigned NumElems = Val.getNumElems();
149+
150+
// Each element is one bit, so create an integer with NumElts bits.
151+
llvm::APSInt Result(NumElems, 0);
152+
for (unsigned I = 0; I != NumElems; ++I) {
153+
if (Val.elem<bool>(I))
154+
Result.setBit(I);
155+
}
156+
157+
return Result;
158+
}
159+
144160
static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
145161
const InterpFrame *Frame,
146162
const CallExpr *Call) {
@@ -643,8 +659,14 @@ static bool interp__builtin_abs(InterpState &S, CodePtr OpPC,
643659
static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC,
644660
const InterpFrame *Frame,
645661
const CallExpr *Call) {
646-
PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
647-
APSInt Val = popToAPSInt(S.Stk, ArgT);
662+
APSInt Val;
663+
if (Call->getArg(0)->getType()->isExtVectorBoolType()) {
664+
const Pointer &Arg = S.Stk.pop<Pointer>();
665+
Val = convertBoolVectorToInt(Arg);
666+
} else {
667+
PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
668+
Val = popToAPSInt(S.Stk, ArgT);
669+
}
648670
pushInteger(S, Val.popcount(), Call->getType());
649671
return true;
650672
}
@@ -940,8 +962,14 @@ static bool interp__builtin_clz(InterpState &S, CodePtr OpPC,
940962
PrimType FallbackT = *S.getContext().classify(Call->getArg(1));
941963
Fallback = popToAPSInt(S.Stk, FallbackT);
942964
}
943-
PrimType ValT = *S.getContext().classify(Call->getArg(0));
944-
const APSInt &Val = popToAPSInt(S.Stk, ValT);
965+
APSInt Val;
966+
if (Call->getArg(0)->getType()->isExtVectorBoolType()) {
967+
const Pointer &Arg = S.Stk.pop<Pointer>();
968+
Val = convertBoolVectorToInt(Arg);
969+
} else {
970+
PrimType ValT = *S.getContext().classify(Call->getArg(0));
971+
Val = popToAPSInt(S.Stk, ValT);
972+
}
945973

946974
// When the argument is 0, the result of GCC builtins is undefined, whereas
947975
// for Microsoft intrinsics, the result is the bit-width of the argument.
@@ -971,8 +999,14 @@ static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC,
971999
PrimType FallbackT = *S.getContext().classify(Call->getArg(1));
9721000
Fallback = popToAPSInt(S.Stk, FallbackT);
9731001
}
974-
PrimType ValT = *S.getContext().classify(Call->getArg(0));
975-
const APSInt &Val = popToAPSInt(S.Stk, ValT);
1002+
APSInt Val;
1003+
if (Call->getArg(0)->getType()->isExtVectorBoolType()) {
1004+
const Pointer &Arg = S.Stk.pop<Pointer>();
1005+
Val = convertBoolVectorToInt(Arg);
1006+
} else {
1007+
PrimType ValT = *S.getContext().classify(Call->getArg(0));
1008+
Val = popToAPSInt(S.Stk, ValT);
1009+
}
9761010

9771011
if (Val == 0) {
9781012
if (Fallback) {

clang/lib/AST/ExprConstant.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11255,6 +11255,24 @@ static bool EvaluateVector(const Expr* E, APValue& Result, EvalInfo &Info) {
1125511255
return VectorExprEvaluator(Info, Result).Visit(E);
1125611256
}
1125711257

11258+
static llvm::APInt ConvertBoolVectorToInt(const APValue &Val) {
11259+
assert(Val.isVector() && "expected vector APValue");
11260+
unsigned NumElts = Val.getVectorLength();
11261+
11262+
// Each element is one bit, so create an integer with NumElts bits.
11263+
llvm::APInt Result(NumElts, 0);
11264+
11265+
for (unsigned I = 0; I < NumElts; ++I) {
11266+
const APValue &Elt = Val.getVectorElt(I);
11267+
assert(Elt.isInt() && "expected integer element in bool vector");
11268+
11269+
if (Elt.getInt().getBoolValue())
11270+
Result.setBit(I);
11271+
}
11272+
11273+
return Result;
11274+
}
11275+
1125811276
bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) {
1125911277
const VectorType *VTy = E->getType()->castAs<VectorType>();
1126011278
unsigned NElts = VTy->getNumElements();
@@ -13442,8 +13460,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1344213460
case Builtin::BI__lzcnt:
1344313461
case Builtin::BI__lzcnt64: {
1344413462
APSInt Val;
13445-
if (!EvaluateInteger(E->getArg(0), Val, Info))
13463+
if (E->getArg(0)->getType()->isExtVectorBoolType()) {
13464+
APValue Vec;
13465+
if (!EvaluateVector(E->getArg(0), Vec, Info))
13466+
return false;
13467+
Val = ConvertBoolVectorToInt(Vec);
13468+
} else if (!EvaluateInteger(E->getArg(0), Val, Info)) {
1344613469
return false;
13470+
}
1344713471

1344813472
std::optional<APSInt> Fallback;
1344913473
if ((BuiltinOp == Builtin::BI__builtin_clzg ||
@@ -13528,8 +13552,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1352813552
case Builtin::BI__builtin_ctzg:
1352913553
case Builtin::BI__builtin_elementwise_cttz: {
1353013554
APSInt Val;
13531-
if (!EvaluateInteger(E->getArg(0), Val, Info))
13555+
if (E->getArg(0)->getType()->isExtVectorBoolType()) {
13556+
APValue Vec;
13557+
if (!EvaluateVector(E->getArg(0), Vec, Info))
13558+
return false;
13559+
Val = ConvertBoolVectorToInt(Vec);
13560+
} else if (!EvaluateInteger(E->getArg(0), Val, Info)) {
1353213561
return false;
13562+
}
1353313563

1353413564
std::optional<APSInt> Fallback;
1353513565
if ((BuiltinOp == Builtin::BI__builtin_ctzg ||
@@ -13744,8 +13774,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1374413774
case Builtin::BI__popcnt:
1374513775
case Builtin::BI__popcnt64: {
1374613776
APSInt Val;
13747-
if (!EvaluateInteger(E->getArg(0), Val, Info))
13777+
if (E->getArg(0)->getType()->isExtVectorBoolType()) {
13778+
APValue Vec;
13779+
if (!EvaluateVector(E->getArg(0), Vec, Info))
13780+
return false;
13781+
Val = ConvertBoolVectorToInt(Vec);
13782+
} else if (!EvaluateInteger(E->getArg(0), Val, Info)) {
1374813783
return false;
13784+
}
1374913785

1375013786
return Success(Val.popcount(), E);
1375113787
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,6 +1692,23 @@ getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
16921692
llvm_unreachable("invalid interlocking");
16931693
}
16941694

1695+
static llvm::Value *EmitBitCountExpr(CodeGenFunction &CGF, const Expr *E) {
1696+
llvm::Value *ArgValue = CGF.EmitScalarExpr(E);
1697+
llvm::Type *ArgType = ArgValue->getType();
1698+
1699+
// Boolean vectors can be casted directly to its bitfield representation. We
1700+
// intentionally do not round up to the next power of two size and let LLVM
1701+
// handle the trailing bits.
1702+
if (auto *VT = dyn_cast<llvm::FixedVectorType>(ArgType);
1703+
VT && VT->getElementType()->isIntegerTy(1)) {
1704+
llvm::Type *StorageType =
1705+
llvm::Type::getIntNTy(CGF.getLLVMContext(), VT->getNumElements());
1706+
ArgValue = CGF.Builder.CreateBitCast(ArgValue, StorageType);
1707+
}
1708+
1709+
return ArgValue;
1710+
}
1711+
16951712
/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
16961713
/// bits and a bit position and read and optionally modify the bit at that
16971714
/// position. The position index can be arbitrarily large, i.e. it can be larger
@@ -2019,7 +2036,7 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
20192036
assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
20202037
"Unsupported builtin check kind");
20212038

2022-
Value *ArgValue = EmitScalarExpr(E);
2039+
Value *ArgValue = EmitBitCountExpr(*this, E);
20232040
if (!SanOpts.has(SanitizerKind::Builtin))
20242041
return ArgValue;
20252042

@@ -3333,7 +3350,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
33333350
E->getNumArgs() > 1;
33343351

33353352
Value *ArgValue =
3336-
HasFallback ? EmitScalarExpr(E->getArg(0))
3353+
HasFallback ? EmitBitCountExpr(*this, E->getArg(0))
33373354
: EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
33383355

33393356
llvm::Type *ArgType = ArgValue->getType();
@@ -3370,7 +3387,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
33703387
E->getNumArgs() > 1;
33713388

33723389
Value *ArgValue =
3373-
HasFallback ? EmitScalarExpr(E->getArg(0))
3390+
HasFallback ? EmitBitCountExpr(*this, E->getArg(0))
33743391
: EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
33753392

33763393
llvm::Type *ArgType = ArgValue->getType();
@@ -3455,7 +3472,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
34553472
case Builtin::BI__builtin_popcountl:
34563473
case Builtin::BI__builtin_popcountll:
34573474
case Builtin::BI__builtin_popcountg: {
3458-
Value *ArgValue = EmitScalarExpr(E->getArg(0));
3475+
Value *ArgValue = EmitBitCountExpr(*this, E->getArg(0));
34593476

34603477
llvm::Type *ArgType = ArgValue->getType();
34613478
Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);

clang/lib/Sema/SemaChecking.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2214,7 +2214,7 @@ static bool BuiltinPopcountg(Sema &S, CallExpr *TheCall) {
22142214

22152215
QualType ArgTy = Arg->getType();
22162216

2217-
if (!ArgTy->isUnsignedIntegerType()) {
2217+
if (!ArgTy->isUnsignedIntegerType() && !ArgTy->isExtVectorBoolType()) {
22182218
S.Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
22192219
<< 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0
22202220
<< ArgTy;
@@ -2239,7 +2239,7 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) {
22392239

22402240
QualType Arg0Ty = Arg0->getType();
22412241

2242-
if (!Arg0Ty->isUnsignedIntegerType()) {
2242+
if (!Arg0Ty->isUnsignedIntegerType() && !Arg0Ty->isExtVectorBoolType()) {
22432243
S.Diag(Arg0->getBeginLoc(), diag::err_builtin_invalid_arg_type)
22442244
<< 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0
22452245
<< Arg0Ty;

clang/test/AST/ByteCode/builtin-functions.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,7 @@ namespace SourceLocation {
454454
}
455455

456456
#define BITSIZE(x) (sizeof(x) * 8)
457+
constexpr bool __attribute__((ext_vector_type(4))) v4b{};
457458
namespace popcount {
458459
static_assert(__builtin_popcount(~0u) == __CHAR_BIT__ * sizeof(unsigned int), "");
459460
static_assert(__builtin_popcount(0) == 0, "");
@@ -471,6 +472,7 @@ namespace popcount {
471472
static_assert(__builtin_popcountg(0ul) == 0, "");
472473
static_assert(__builtin_popcountg(~0ull) == __CHAR_BIT__ * sizeof(unsigned long long), "");
473474
static_assert(__builtin_popcountg(0ull) == 0, "");
475+
static_assert(__builtin_popcountg(v4b) == 0, "");
474476
#ifdef __SIZEOF_INT128__
475477
static_assert(__builtin_popcountg(~(unsigned __int128)0) == __CHAR_BIT__ * sizeof(unsigned __int128), "");
476478
static_assert(__builtin_popcountg((unsigned __int128)0) == 0, "");
@@ -743,6 +745,7 @@ namespace clz {
743745
char clz62[__builtin_clzg((unsigned _BitInt(128))0xf) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1];
744746
char clz63[__builtin_clzg((unsigned _BitInt(128))0xf, 42) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1];
745747
#endif
748+
char clz64[__builtin_clzg(v4b, 0) == 0 ? 1 : -1];
746749
}
747750

748751
namespace ctz {
@@ -813,6 +816,7 @@ namespace ctz {
813816
char ctz62[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1)) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1];
814817
char ctz63[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1), 42) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1];
815818
#endif
819+
char clz64[__builtin_ctzg(v4b, 0) == 0 ? 1 : -1];
816820
}
817821

818822
namespace bswap {

0 commit comments

Comments
 (0)