Skip to content

Commit 4ccbbbc

Browse files
committed
[X86] Allow PSHUFD/PSHUFLW/PSHUFW intrinsics in constexpr
1 parent ba1f202 commit 4ccbbbc

File tree

2 files changed

+70
-71
lines changed

2 files changed

+70
-71
lines changed

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2867,58 +2867,58 @@ enum class Half { None, Low, High };
28672867
static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC, const CallExpr *Call,
28682868
Half whichHalf) {
28692869
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
2870-
APSInt controlImm = popToAPSInt(S, Call->getArg(1));
2871-
const Pointer &src = S.Stk.pop<Pointer>();
2870+
APSInt ControlImm = popToAPSInt(S, Call->getArg(1));
2871+
const Pointer &Src = S.Stk.pop<Pointer>();
28722872
const Pointer &Dst = S.Stk.peek<Pointer>();
28732873

2874-
const unsigned numElts = Dst.getNumElems();
2875-
const PrimType elemTy = Dst.getFieldDesc()->getPrimType();
2874+
unsigned NumElems = Dst.getNumElems();
2875+
PrimType ElemT = Dst.getFieldDesc()->getPrimType();
28762876

28772877
// Only i16/i32 supported
2878-
const unsigned elemBits = static_cast<unsigned>(primSize(elemTy) * 8);
2879-
if (elemBits != 16 && elemBits != 32) return false;
2878+
unsigned ElemBits = static_cast<unsigned>(primSize(ElemT) * 8);
2879+
if (ElemBits != 16 && ElemBits != 32) return false;
28802880

28812881
// Lane: 64b for MMX, 128b otherwise
2882-
const unsigned totalBits = numElts * elemBits;
2883-
const unsigned laneBits = (totalBits == 64) ? 64u : 128u;
2884-
const unsigned laneElts = laneBits / elemBits;
2885-
assert(laneElts && (numElts % laneElts == 0));
2882+
unsigned TotalBits = NumElems * ElemBits;
2883+
unsigned LaneBits = (TotalBits == 64) ? 64u : 128u;
2884+
unsigned LaneElts = LaneBits / ElemBits;
2885+
assert(LaneElts && (NumElems % LaneElts == 0));
28862886

2887-
const uint8_t ctl = static_cast<uint8_t>(controlImm.getZExtValue());
2887+
uint8_t ctl = static_cast<uint8_t>(ControlImm.getZExtValue());
28882888

2889-
for (unsigned idx = 0; idx != numElts; idx++) {
2890-
const unsigned laneBase = (idx / laneElts) * laneElts;
2891-
const unsigned laneIdx = idx % laneElts;
2889+
for (unsigned idx = 0; idx != NumElems; idx++) {
2890+
unsigned LaneBase = (idx / LaneElts) * LaneElts;
2891+
unsigned LaneIdx = idx % LaneElts;
28922892

2893-
unsigned srcIdx = idx;
2893+
unsigned SrcIdx = idx;
28942894

2895-
if (elemBits == 32) {
2895+
if (ElemBits == 32) {
28962896
// PSHUFD: 4×i32 per lane
2897-
const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
2898-
srcIdx = laneBase + sel;
2897+
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
2898+
SrcIdx = LaneBase + sel;
28992899
} else { // 16-bit shuffles
2900-
if (laneElts == 4) {
2900+
if (LaneElts == 4) {
29012901
// MMX: permute all 4×i16
2902-
const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
2903-
srcIdx = laneBase + sel;
2902+
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
2903+
SrcIdx = LaneBase + sel;
29042904
} else {
29052905
// 128b lanes: shuffle 4×i16 half
2906-
constexpr unsigned halfSize = 4;
2907-
if (whichHalf == Half::Low && laneIdx < halfSize) {
2908-
const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
2909-
srcIdx = laneBase + sel;
2910-
} else if (whichHalf == Half::High && laneIdx >= halfSize) {
2911-
const unsigned rel = laneIdx - halfSize;
2912-
const unsigned sel = (ctl >> (2 * rel)) & 0x3;
2913-
srcIdx = laneBase + halfSize + sel;
2906+
constexpr unsigned HalfSize = 4;
2907+
if (whichHalf == Half::Low && LaneIdx < HalfSize) {
2908+
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
2909+
SrcIdx = LaneBase + sel;
2910+
} else if (whichHalf == Half::High && LaneIdx >= HalfSize) {
2911+
unsigned rel = LaneIdx - HalfSize;
2912+
unsigned sel = (ctl >> (2 * rel)) & 0x3;
2913+
SrcIdx = LaneBase + HalfSize + sel;
29142914
} else if (whichHalf == Half::None) {
2915-
const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
2916-
srcIdx = laneBase + sel;
2915+
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
2916+
SrcIdx = LaneBase + sel;
29172917
}
29182918
}
29192919
}
29202920

2921-
INT_TYPE_SWITCH_NO_BOOL(elemTy, { Dst.elem<T>(idx) = src.elem<T>(srcIdx); });
2921+
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(idx) = Src.elem<T>(SrcIdx); });
29222922
}
29232923
Dst.initializeAllElements();
29242924
return true;

clang/lib/AST/ExprConstant.cpp

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -11615,72 +11615,71 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
1161511615
return true;
1161611616
}
1161711617

11618-
static constexpr unsigned noHalf = ~0u;
1161911618

1162011619
static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call,
11621-
unsigned elemBits, unsigned halfBase,
11620+
unsigned ElemBits, unsigned HalfBase,
1162211621
APValue &Out) {
1162311622
// Expect (vec, imm8)
11624-
APValue vec;
11625-
APSInt imm;
11626-
if (!EvaluateAsRValue(Info, Call->getArg(0), vec)) return false;
11627-
if (!EvaluateInteger(Call->getArg(1), imm, Info)) return false;
11623+
APValue Vec;
11624+
APSInt Imm;
11625+
if (!EvaluateAsRValue(Info, Call->getArg(0), Vec)) return false;
11626+
if (!EvaluateInteger(Call->getArg(1), Imm, Info)) return false;
1162811627

11629-
const auto *vt = Call->getType()->getAs<VectorType>();
11630-
if (!vt) return false;
11631-
const unsigned nElts = vt->getNumElements();
11628+
const auto *VT = Call->getType()->getAs<VectorType>();
11629+
if (!VT) return false;
11630+
unsigned NumElts = VT->getNumElements();
1163211631

1163311632
// Lane geometry: MMX pshufw is a single 64-bit lane; others use 128-bit lanes.
11634-
const unsigned totalBits = nElts * elemBits;
11635-
const unsigned laneBits = (totalBits == 64) ? 64u : 128u;
11636-
const unsigned laneElts = laneBits / elemBits;
11637-
if (!laneElts || (nElts % laneElts) != 0) return false;
11633+
unsigned TotalBits = NumElts * ElemBits;
11634+
unsigned LaneBits = (TotalBits == 64) ? 64u : 128u;
11635+
unsigned LaneElts = LaneBits / ElemBits;
11636+
if (!LaneElts || (NumElts % LaneElts) != 0) return false;
1163811637

11639-
const uint8_t ctl = static_cast<uint8_t>(imm.getZExtValue());
11638+
uint8_t ctl = static_cast<uint8_t>(Imm.getZExtValue());
1164011639

1164111640
SmallVector<APValue, 32> ResultElements;
11642-
ResultElements.reserve(nElts);
11641+
ResultElements.reserve(NumElts);
1164311642

11644-
for (unsigned idx = 0; idx != nElts; idx++) {
11645-
const unsigned laneBase = (idx / laneElts) * laneElts;
11646-
const unsigned laneIdx = idx % laneElts;
11643+
for (unsigned idx = 0; idx != NumElts; idx++) {
11644+
unsigned LaneBase = (idx / LaneElts) * LaneElts;
11645+
unsigned LaneIdx = idx % LaneElts;
1164711646

11648-
unsigned srcIdx = idx;
11647+
unsigned SrcIdx = idx;
1164911648

11650-
if (elemBits == 32) {
11649+
if (ElemBits == 32) {
1165111650
// PSHUFD: permute 4×i32 per 128-bit lane
11652-
const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
11653-
srcIdx = laneBase + sel;
11651+
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
11652+
SrcIdx = LaneBase + sel;
1165411653
} else {
1165511654
// elemBits == 16 (PSHUFLW / PSHUFHW / PSHUFW)
11656-
if (laneElts == 4) {
11655+
if (LaneElts == 4) {
1165711656
// MMX PSHUFW: permute entire 64-bit lane (4×i16)
11658-
const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
11659-
srcIdx = laneBase + sel;
11657+
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
11658+
SrcIdx = LaneBase + sel;
1166011659
} else {
1166111660
// SSE/AVX/AVX-512: 128-bit lane has 8×i16. Permute a 4×i16 half.
11662-
constexpr unsigned halfSize = 4;
11663-
if (halfBase == 0) {
11661+
constexpr unsigned HalfSize = 4;
11662+
if (HalfBase == 0) {
1166411663
// PSHUFLW: permute low half (words 0..3)
11665-
if (laneIdx < halfSize) {
11666-
const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
11667-
srcIdx = laneBase + sel;
11664+
if (LaneIdx < HalfSize) {
11665+
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
11666+
SrcIdx = LaneBase + sel;
1166811667
}
11669-
} else if (halfBase == halfSize) {
11668+
} else if (HalfBase == HalfSize) {
1167011669
// PSHUFHW: permute high half (words 4..7)
11671-
if (laneIdx >= halfSize) {
11672-
const unsigned rel = laneIdx - halfSize;
11673-
const unsigned sel = (ctl >> (2 * rel)) & 0x3;
11674-
srcIdx = laneBase + halfBase + sel;
11670+
if (LaneIdx >= HalfSize) {
11671+
unsigned rel = LaneIdx - HalfSize;
11672+
unsigned sel = (ctl >> (2 * rel)) & 0x3;
11673+
SrcIdx = LaneBase + HalfBase + sel;
1167511674
}
1167611675
} else {
11677-
const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
11678-
srcIdx = laneBase + sel;
11676+
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
11677+
SrcIdx = LaneBase + sel;
1167911678
}
1168011679
}
1168111680
}
1168211681

11683-
ResultElements.push_back(vec.getVectorElt(srcIdx));
11682+
ResultElements.push_back(Vec.getVectorElt(SrcIdx));
1168411683
}
1168511684

1168611685
Out = APValue(ResultElements.data(), ResultElements.size());
@@ -12181,7 +12180,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1218112180
case X86::BI__builtin_ia32_pshufd256:
1218212181
case X86::BI__builtin_ia32_pshufd512: {
1218312182
APValue R;
12184-
if (!evalPshufBuiltin(Info, E, /*ElemBits=*/32, /*HalfBaseElems=*/noHalf, R))
12183+
if (!evalPshufBuiltin(Info, E, /*ElemBits=*/32, /*HalfBaseElems=*/~0u, R))
1218512184
return false;
1218612185
return Success(R, E);
1218712186
}

0 commit comments

Comments
 (0)