Skip to content

Commit 8b12838

Browse files
authored
[clang] Introduce elementwise ctlz/cttz builtins (#131995)
These builtins are modeled on the clzg/ctzg builtins, which accept an optional second argument. This second argument is returned if the first argument is 0. These builtins unconditionally exhibit zero-is-undef behaviour, regardless of target preference for the other ctz/clz builtins. The builtins have constexpr support. Fixes #154113
1 parent d770567 commit 8b12838

File tree

10 files changed

+398
-15
lines changed

10 files changed

+398
-15
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -760,8 +760,9 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
760760
The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``,
761761
``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
762762
``__builtin_elementwise_sub_sat``, ``__builtin_elementwise_max``,
763-
``__builtin_elementwise_min``, and ``__builtin_elementwise_abs``
764-
can be called in a ``constexpr`` context.
763+
``__builtin_elementwise_min``, ``__builtin_elementwise_abs``,
764+
``__builtin_elementwise_ctlz``, and ``__builtin_elementwise_cttz`` can be
765+
called in a ``constexpr`` context.
765766

766767
No implicit promotion of integer types takes place. The mixing of integer types
767768
of different sizes and signs is forbidden in binary and ternary builtins.
@@ -870,6 +871,14 @@ T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Co
870871
significant bits of the wide value), the combined value is shifted
871872
right by z, and the least significant bits are extracted to produce
872873
a result that is the same size as the original arguments.
874+
T __builtin_elementwise_ctlz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types
875+
the first argument is 0 and an optional second argument is provided,
876+
the second argument is returned. It is undefined behaviour if the
877+
first argument is 0 and no second argument is provided.
878+
T __builtin_elementwise_cttz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types
879+
the first argument is 0 and an optional second argument is provided,
880+
the second argument is returned. It is undefined behaviour if the
881+
first argument is 0 and no second argument is provided.
873882
============================================== ====================================================================== =========================================
874883

875884

clang/include/clang/Basic/Builtins.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1526,6 +1526,18 @@ def ElementwiseFshr : Builtin {
15261526
let Prototype = "void(...)";
15271527
}
15281528

1529+
def ElementwiseCtlz : Builtin {
1530+
let Spellings = ["__builtin_elementwise_ctlz"];
1531+
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
1532+
let Prototype = "void(...)";
1533+
}
1534+
1535+
def ElementwiseCttz : Builtin {
1536+
let Spellings = ["__builtin_elementwise_cttz"];
1537+
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
1538+
let Prototype = "void(...)";
1539+
}
1540+
15291541
def ReduceMax : Builtin {
15301542
let Spellings = ["__builtin_reduce_max"];
15311543
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];

clang/include/clang/Basic/DiagnosticASTKinds.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,9 @@ def note_constexpr_non_const_vectorelements : Note<
400400
"cannot determine number of elements for sizeless vectors in a constant expression">;
401401
def note_constexpr_assumption_failed : Note<
402402
"assumption evaluated to false">;
403+
def note_constexpr_countzeroes_zero : Note<
404+
"evaluation of %select{__builtin_elementwise_ctlz|__builtin_elementwise_cttz}0 "
405+
"with a zero value is undefined">;
403406
def err_experimental_clang_interp_failed : Error<
404407
"the experimental clang interpreter failed to evaluate an expression">;
405408

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1785,6 +1785,93 @@ static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC,
17851785
return true;
17861786
}
17871787

1788+
/// Can be called with an integer or vector as the first and only parameter.
1789+
static bool interp__builtin_elementwise_countzeroes(InterpState &S,
1790+
CodePtr OpPC,
1791+
const InterpFrame *Frame,
1792+
const CallExpr *Call,
1793+
unsigned BuiltinID) {
1794+
const bool HasZeroArg = Call->getNumArgs() == 2;
1795+
const bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_cttz;
1796+
assert(Call->getNumArgs() == 1 || HasZeroArg);
1797+
if (Call->getArg(0)->getType()->isIntegerType()) {
1798+
PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
1799+
APSInt Val = popToAPSInt(S.Stk, ArgT);
1800+
std::optional<APSInt> ZeroVal;
1801+
if (HasZeroArg) {
1802+
ZeroVal = Val;
1803+
Val = popToAPSInt(S.Stk, ArgT);
1804+
}
1805+
1806+
if (Val.isZero()) {
1807+
if (ZeroVal) {
1808+
pushInteger(S, *ZeroVal, Call->getType());
1809+
return true;
1810+
}
1811+
// If we haven't been provided the second argument, the result is
1812+
// undefined
1813+
S.FFDiag(S.Current->getSource(OpPC),
1814+
diag::note_constexpr_countzeroes_zero)
1815+
<< /*IsTrailing=*/IsCTTZ;
1816+
return false;
1817+
}
1818+
1819+
if (BuiltinID == Builtin::BI__builtin_elementwise_ctlz) {
1820+
pushInteger(S, Val.countLeadingZeros(), Call->getType());
1821+
} else {
1822+
pushInteger(S, Val.countTrailingZeros(), Call->getType());
1823+
}
1824+
return true;
1825+
}
1826+
// Otherwise, the argument must be a vector.
1827+
const ASTContext &ASTCtx = S.getASTContext();
1828+
Pointer ZeroArg;
1829+
if (HasZeroArg) {
1830+
assert(Call->getArg(1)->getType()->isVectorType() &&
1831+
ASTCtx.hasSameUnqualifiedType(Call->getArg(0)->getType(),
1832+
Call->getArg(1)->getType()));
1833+
ZeroArg = S.Stk.pop<Pointer>();
1834+
assert(ZeroArg.getFieldDesc()->isPrimitiveArray());
1835+
}
1836+
assert(Call->getArg(0)->getType()->isVectorType());
1837+
const Pointer &Arg = S.Stk.pop<Pointer>();
1838+
assert(Arg.getFieldDesc()->isPrimitiveArray());
1839+
const Pointer &Dst = S.Stk.peek<Pointer>();
1840+
assert(Dst.getFieldDesc()->isPrimitiveArray());
1841+
assert(Arg.getFieldDesc()->getNumElems() ==
1842+
Dst.getFieldDesc()->getNumElems());
1843+
1844+
QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1845+
PrimType ElemT = *S.getContext().classify(ElemType);
1846+
unsigned NumElems = Arg.getNumElems();
1847+
1848+
// FIXME: Reading from uninitialized vector elements?
1849+
for (unsigned I = 0; I != NumElems; ++I) {
1850+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1851+
APInt EltVal = Arg.atIndex(I).deref<T>().toAPSInt();
1852+
if (EltVal.isZero()) {
1853+
if (HasZeroArg) {
1854+
Dst.atIndex(I).deref<T>() = ZeroArg.atIndex(I).deref<T>();
1855+
} else {
1856+
// If we haven't been provided the second argument, the result is
1857+
// undefined
1858+
S.FFDiag(S.Current->getSource(OpPC),
1859+
diag::note_constexpr_countzeroes_zero)
1860+
<< /*IsTrailing=*/IsCTTZ;
1861+
return false;
1862+
}
1863+
} else if (IsCTTZ) {
1864+
Dst.atIndex(I).deref<T>() = T::from(EltVal.countTrailingZeros());
1865+
} else {
1866+
Dst.atIndex(I).deref<T>() = T::from(EltVal.countLeadingZeros());
1867+
}
1868+
Dst.atIndex(I).initialize();
1869+
});
1870+
}
1871+
1872+
return true;
1873+
}
1874+
17881875
static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
17891876
const InterpFrame *Frame,
17901877
const CallExpr *Call, unsigned ID) {
@@ -2903,6 +2990,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
29032990
case Builtin::BI__builtin_ctzg:
29042991
return interp__builtin_ctz(S, OpPC, Frame, Call, BuiltinID);
29052992

2993+
case Builtin::BI__builtin_elementwise_ctlz:
2994+
case Builtin::BI__builtin_elementwise_cttz:
2995+
return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call,
2996+
BuiltinID);
2997+
29062998
case Builtin::BI__builtin_bswap16:
29072999
case Builtin::BI__builtin_bswap32:
29083000
case Builtin::BI__builtin_bswap64:

clang/lib/AST/ExprConstant.cpp

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11827,6 +11827,53 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1182711827

1182811828
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1182911829
}
11830+
case Builtin::BI__builtin_elementwise_ctlz:
11831+
case Builtin::BI__builtin_elementwise_cttz: {
11832+
APValue SourceLHS;
11833+
std::optional<APValue> Fallback;
11834+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS))
11835+
return false;
11836+
if (E->getNumArgs() > 1) {
11837+
APValue FallbackTmp;
11838+
if (!EvaluateAsRValue(Info, E->getArg(1), FallbackTmp))
11839+
return false;
11840+
Fallback = FallbackTmp;
11841+
}
11842+
11843+
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
11844+
unsigned SourceLen = SourceLHS.getVectorLength();
11845+
SmallVector<APValue, 4> ResultElements;
11846+
ResultElements.reserve(SourceLen);
11847+
11848+
for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
11849+
APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
11850+
if (!LHS) {
11851+
// Without a fallback, a zero element is undefined
11852+
if (!Fallback) {
11853+
Info.FFDiag(E, diag::note_constexpr_countzeroes_zero)
11854+
<< /*IsTrailing=*/(E->getBuiltinCallee() ==
11855+
Builtin::BI__builtin_elementwise_cttz);
11856+
return false;
11857+
}
11858+
ResultElements.push_back(Fallback->getVectorElt(EltNum));
11859+
continue;
11860+
}
11861+
switch (E->getBuiltinCallee()) {
11862+
case Builtin::BI__builtin_elementwise_ctlz:
11863+
ResultElements.push_back(APValue(
11864+
APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countl_zero()),
11865+
DestEltTy->isUnsignedIntegerOrEnumerationType())));
11866+
break;
11867+
case Builtin::BI__builtin_elementwise_cttz:
11868+
ResultElements.push_back(APValue(
11869+
APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countr_zero()),
11870+
DestEltTy->isUnsignedIntegerOrEnumerationType())));
11871+
break;
11872+
}
11873+
}
11874+
11875+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
11876+
}
1183011877
}
1183111878
}
1183211879

@@ -13382,6 +13429,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1338213429
case Builtin::BI__builtin_clzll:
1338313430
case Builtin::BI__builtin_clzs:
1338413431
case Builtin::BI__builtin_clzg:
13432+
case Builtin::BI__builtin_elementwise_ctlz:
1338513433
case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
1338613434
case Builtin::BI__lzcnt:
1338713435
case Builtin::BI__lzcnt64: {
@@ -13390,7 +13438,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1339013438
return false;
1339113439

1339213440
std::optional<APSInt> Fallback;
13393-
if (BuiltinOp == Builtin::BI__builtin_clzg && E->getNumArgs() > 1) {
13441+
if ((BuiltinOp == Builtin::BI__builtin_clzg ||
13442+
BuiltinOp == Builtin::BI__builtin_elementwise_ctlz) &&
13443+
E->getNumArgs() > 1) {
1339413444
APSInt FallbackTemp;
1339513445
if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info))
1339613446
return false;
@@ -13408,6 +13458,11 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1340813458
BuiltinOp != Builtin::BI__lzcnt &&
1340913459
BuiltinOp != Builtin::BI__lzcnt64;
1341013460

13461+
if (BuiltinOp == Builtin::BI__builtin_elementwise_ctlz) {
13462+
Info.FFDiag(E, diag::note_constexpr_countzeroes_zero)
13463+
<< /*IsTrailing=*/false;
13464+
}
13465+
1341113466
if (ZeroIsUndefined)
1341213467
return Error(E);
1341313468
}
@@ -13462,13 +13517,16 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1346213517
case Builtin::BI__builtin_ctzl:
1346313518
case Builtin::BI__builtin_ctzll:
1346413519
case Builtin::BI__builtin_ctzs:
13465-
case Builtin::BI__builtin_ctzg: {
13520+
case Builtin::BI__builtin_ctzg:
13521+
case Builtin::BI__builtin_elementwise_cttz: {
1346613522
APSInt Val;
1346713523
if (!EvaluateInteger(E->getArg(0), Val, Info))
1346813524
return false;
1346913525

1347013526
std::optional<APSInt> Fallback;
13471-
if (BuiltinOp == Builtin::BI__builtin_ctzg && E->getNumArgs() > 1) {
13527+
if ((BuiltinOp == Builtin::BI__builtin_ctzg ||
13528+
BuiltinOp == Builtin::BI__builtin_elementwise_cttz) &&
13529+
E->getNumArgs() > 1) {
1347213530
APSInt FallbackTemp;
1347313531
if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info))
1347413532
return false;
@@ -13479,6 +13537,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1347913537
if (Fallback)
1348013538
return Success(*Fallback, E);
1348113539

13540+
if (BuiltinOp == Builtin::BI__builtin_elementwise_cttz) {
13541+
Info.FFDiag(E, diag::note_constexpr_countzeroes_zero)
13542+
<< /*IsTrailing=*/true;
13543+
}
1348213544
return Error(E);
1348313545
}
1348413546

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3326,9 +3326,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
33263326
case Builtin::BI__builtin_ctz:
33273327
case Builtin::BI__builtin_ctzl:
33283328
case Builtin::BI__builtin_ctzll:
3329-
case Builtin::BI__builtin_ctzg: {
3330-
bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3331-
E->getNumArgs() > 1;
3329+
case Builtin::BI__builtin_ctzg:
3330+
case Builtin::BI__builtin_elementwise_cttz: {
3331+
bool HasFallback =
3332+
(BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg ||
3333+
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_cttz) &&
3334+
E->getNumArgs() > 1;
33323335

33333336
Value *ArgValue =
33343337
HasFallback ? EmitScalarExpr(E->getArg(0))
@@ -3338,8 +3341,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
33383341
Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
33393342

33403343
llvm::Type *ResultType = ConvertType(E->getType());
3341-
Value *ZeroUndef =
3342-
Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3344+
// The elementwise builtins always exhibit zero-is-undef behaviour
3345+
Value *ZeroUndef = Builder.getInt1(
3346+
HasFallback || getTarget().isCLZForZeroUndef() ||
3347+
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_cttz);
33433348
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
33443349
if (Result->getType() != ResultType)
33453350
Result =
@@ -3358,9 +3363,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
33583363
case Builtin::BI__builtin_clz:
33593364
case Builtin::BI__builtin_clzl:
33603365
case Builtin::BI__builtin_clzll:
3361-
case Builtin::BI__builtin_clzg: {
3362-
bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3363-
E->getNumArgs() > 1;
3366+
case Builtin::BI__builtin_clzg:
3367+
case Builtin::BI__builtin_elementwise_ctlz: {
3368+
bool HasFallback =
3369+
(BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg ||
3370+
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_ctlz) &&
3371+
E->getNumArgs() > 1;
33643372

33653373
Value *ArgValue =
33663374
HasFallback ? EmitScalarExpr(E->getArg(0))
@@ -3370,8 +3378,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
33703378
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
33713379

33723380
llvm::Type *ResultType = ConvertType(E->getType());
3373-
Value *ZeroUndef =
3374-
Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3381+
// The elementwise builtins always exhibit zero-is-undef behaviour
3382+
Value *ZeroUndef = Builder.getInt1(
3383+
HasFallback || getTarget().isCLZForZeroUndef() ||
3384+
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_ctlz);
33753385
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
33763386
if (Result->getType() != ResultType)
33773387
Result =

clang/lib/Sema/SemaChecking.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3080,6 +3080,19 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
30803080
TheCall->setType(Magnitude.get()->getType());
30813081
break;
30823082
}
3083+
case Builtin::BI__builtin_elementwise_ctlz:
3084+
case Builtin::BI__builtin_elementwise_cttz:
3085+
// These builtins can be unary or binary. Note for empty calls we call the
3086+
// unary checker in order to not emit an error that says the function
3087+
// expects 2 arguments, which would be misleading.
3088+
if (TheCall->getNumArgs() <= 1) {
3089+
if (PrepareBuiltinElementwiseMathOneArgCall(
3090+
TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
3091+
return ExprError();
3092+
} else if (BuiltinElementwiseMath(
3093+
TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
3094+
return ExprError();
3095+
break;
30833096
case Builtin::BI__builtin_reduce_max:
30843097
case Builtin::BI__builtin_reduce_min: {
30853098
if (PrepareBuiltinReduceMathOneArgCall(TheCall))

0 commit comments

Comments
 (0)