Skip to content

Commit 9f1cc8c

Browse files
authored
Merge branch 'main' into x86-pshufd-splat
2 parents 6544717 + f4a39a8 commit 9f1cc8c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+645
-1491
lines changed

clang/docs/analyzer/developer-docs/DebugChecks.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,22 @@ The analyzer contains a number of checkers which can aid in debugging. Enable
99
them by using the "-analyzer-checker=" flag, followed by the name of the
1010
checker.
1111

12+
These checkers are especially useful when analyzing a specific function, using
13+
the `-analyze-function` flag. The flag accepts the function name for C code,
14+
like `-analyze-function=myfunction`.
15+
For C++ code, due to overloading, the function name must include the
16+
parameter list, like `-analyze-function="myfunction(int, _Bool)"`.
17+
18+
Note that `bool` must be spelled as `_Bool` in the parameter list.
19+
Refer to the output of `-analyzer-display-progress` to find the fully qualified
20+
function name.
21+
22+
There are cases when this name can still collide. For example with template
23+
function instances with non-deducible (aka. explicit) template parameters.
24+
In such cases, prefer passing a USR instead of a function name can resolve this
25+
ambiguity, like this: `-analyze-function="c:@S@Window@F@overloaded#I#"`.
26+
27+
Use the `clang-extdef-mapping` tool to find the USR for different functions.
1228

1329
General Analysis Dumpers
1430
========================

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -216,10 +216,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
216216
def movnti : X86Builtin<"void(int *, int)">;
217217
}
218218

219-
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
220-
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
219+
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
221220
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
221+
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
222222
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
223+
}
224+
225+
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
223226
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
224227
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
225228
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -584,9 +587,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
584587
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
585588
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
586589
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
587-
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
588-
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
589-
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
590590
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
591591
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
592592
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -647,6 +647,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
647647
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
648648
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
649649
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
650+
651+
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
652+
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
653+
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
650654
}
651655

652656
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
@@ -1017,6 +1021,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
10171021
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
10181022
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
10191023
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
1024+
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
10201025
}
10211026

10221027
let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -1990,13 +1995,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
19901995
}
19911996

19921997
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1993-
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
1994-
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
19951998
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
19961999
}
19972000

19982001
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
19992002
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
2003+
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
2004+
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
20002005
}
20012006

20022007
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
@@ -2026,8 +2031,7 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, Req
20262031
def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
20272032
}
20282033

2029-
let Features = "avx512f",
2030-
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
2034+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
20312035
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
20322036
def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
20332037
def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
@@ -3266,7 +3270,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
32663270
}
32673271

32683272
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
3269-
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
32703273
def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
32713274
def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">;
32723275
}

clang/include/clang/CrossTU/CrossTranslationUnit.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,8 @@ class CrossTranslationUnitContext {
180180
llvm::Expected<const VarDecl *> importDefinition(const VarDecl *VD,
181181
ASTUnit *Unit);
182182

183-
/// Get a name to identify a named decl.
184-
static std::optional<std::string> getLookupName(const NamedDecl *ND);
183+
/// Get a name to identify a decl.
184+
static std::optional<std::string> getLookupName(const Decl *D);
185185

186186
/// Emit diagnostics for the user for potential configuration errors.
187187
void emitCrossTUDiagnostics(const IndexError &IE);

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,9 +1258,8 @@ def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
12581258
HelpText<"Compression level for offload device binaries (HIP only)">;
12591259

12601260
def offload_jobs_EQ : Joined<["--"], "offload-jobs=">,
1261-
HelpText<"Specify the number of threads to use for device offloading tasks "
1262-
"during compilation. Can be a positive integer or the string "
1263-
"'jobserver' to use the make-style jobserver from the environment.">;
1261+
HelpText<"Specify the number of threads to use for device offloading tasks"
1262+
" during compilation.">;
12641263

12651264
defm offload_via_llvm : BoolFOption<"offload-via-llvm",
12661265
LangOpts<"OffloadViaLLVM">, DefaultFalse,

clang/include/clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class EntryPointStat {
2525
public:
2626
llvm::StringLiteral name() const { return Name; }
2727

28-
static void lockRegistry();
28+
static void lockRegistry(llvm::StringRef CPPFileName);
2929

3030
static void takeSnapshot(const Decl *EntryPoint);
3131
static void dumpStatsAsCSV(llvm::raw_ostream &OS);

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2773,6 +2773,50 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
27732773
return true;
27742774
}
27752775

2776+
static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
2777+
const CallExpr *Call, bool IsShufHW) {
2778+
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
2779+
APSInt ControlImm = popToAPSInt(S, Call->getArg(1));
2780+
const Pointer &Src = S.Stk.pop<Pointer>();
2781+
const Pointer &Dst = S.Stk.peek<Pointer>();
2782+
2783+
unsigned NumElems = Dst.getNumElems();
2784+
PrimType ElemT = Dst.getFieldDesc()->getPrimType();
2785+
2786+
unsigned ElemBits = static_cast<unsigned>(primSize(ElemT) * 8);
2787+
if (ElemBits != 16 && ElemBits != 32)
2788+
return false;
2789+
2790+
unsigned LaneElts = 128u / ElemBits;
2791+
assert(LaneElts && (NumElems % LaneElts == 0));
2792+
2793+
uint8_t Ctl = static_cast<uint8_t>(ControlImm.getZExtValue());
2794+
2795+
for (unsigned Idx = 0; Idx != NumElems; Idx++) {
2796+
unsigned LaneBase = (Idx / LaneElts) * LaneElts;
2797+
unsigned LaneIdx = Idx % LaneElts;
2798+
unsigned SrcIdx = Idx;
2799+
unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3;
2800+
if (ElemBits == 32) {
2801+
SrcIdx = LaneBase + Sel;
2802+
} else {
2803+
constexpr unsigned HalfSize = 4;
2804+
bool InHigh = LaneIdx >= HalfSize;
2805+
if (!IsShufHW && !InHigh) {
2806+
SrcIdx = LaneBase + Sel;
2807+
} else if (IsShufHW && InHigh) {
2808+
unsigned Rel = LaneIdx - HalfSize;
2809+
Sel = (Ctl >> (2 * Rel)) & 0x3;
2810+
SrcIdx = LaneBase + HalfSize + Sel;
2811+
}
2812+
}
2813+
2814+
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(Idx) = Src.elem<T>(SrcIdx); });
2815+
}
2816+
Dst.initializeAllElements();
2817+
return true;
2818+
}
2819+
27762820
static bool interp__builtin_elementwise_triop(
27772821
InterpState &S, CodePtr OpPC, const CallExpr *Call,
27782822
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3661,6 +3705,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
36613705
case X86::BI__builtin_ia32_selectpd_512:
36623706
return interp__builtin_select(S, OpPC, Call);
36633707

3708+
case X86::BI__builtin_ia32_pshuflw:
3709+
case X86::BI__builtin_ia32_pshuflw256:
3710+
case X86::BI__builtin_ia32_pshuflw512:
3711+
return interp__builtin_ia32_pshuf(S, OpPC, Call, false);
3712+
3713+
case X86::BI__builtin_ia32_pshufhw:
3714+
case X86::BI__builtin_ia32_pshufhw256:
3715+
case X86::BI__builtin_ia32_pshufhw512:
3716+
return interp__builtin_ia32_pshuf(S, OpPC, Call, true);
3717+
3718+
case X86::BI__builtin_ia32_pshufd:
3719+
case X86::BI__builtin_ia32_pshufd256:
3720+
case X86::BI__builtin_ia32_pshufd512:
3721+
return interp__builtin_ia32_pshuf(S, OpPC, Call, false);
3722+
36643723
case X86::BI__builtin_ia32_kandqi:
36653724
case X86::BI__builtin_ia32_kandhi:
36663725
case X86::BI__builtin_ia32_kandsi:

clang/lib/AST/ExprConstant.cpp

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11615,6 +11615,60 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
1161511615
return true;
1161611616
}
1161711617

11618+
static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call,
11619+
bool IsShufHW, APValue &Out) {
11620+
APValue Vec;
11621+
APSInt Imm;
11622+
if (!EvaluateAsRValue(Info, Call->getArg(0), Vec))
11623+
return false;
11624+
if (!EvaluateInteger(Call->getArg(1), Imm, Info))
11625+
return false;
11626+
11627+
const auto *VT = Call->getType()->getAs<VectorType>();
11628+
if (!VT)
11629+
return false;
11630+
11631+
QualType ElemT = VT->getElementType();
11632+
unsigned ElemBits = Info.Ctx.getTypeSize(ElemT);
11633+
unsigned NumElts = VT->getNumElements();
11634+
11635+
unsigned LaneBits = 128u;
11636+
unsigned LaneElts = LaneBits / ElemBits;
11637+
if (!LaneElts || (NumElts % LaneElts) != 0)
11638+
return false;
11639+
11640+
uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
11641+
11642+
SmallVector<APValue, 32> ResultElements;
11643+
ResultElements.reserve(NumElts);
11644+
11645+
for (unsigned Idx = 0; Idx != NumElts; Idx++) {
11646+
unsigned LaneBase = (Idx / LaneElts) * LaneElts;
11647+
unsigned LaneIdx = Idx % LaneElts;
11648+
unsigned SrcIdx = Idx;
11649+
unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3;
11650+
11651+
if (ElemBits == 32) {
11652+
SrcIdx = LaneBase + Sel;
11653+
} else {
11654+
constexpr unsigned HalfSize = 4;
11655+
bool InHigh = LaneIdx >= HalfSize;
11656+
if (!IsShufHW && !InHigh) {
11657+
SrcIdx = LaneBase + Sel;
11658+
} else if (IsShufHW && InHigh) {
11659+
unsigned Rel = LaneIdx - HalfSize;
11660+
Sel = (Ctl >> (2 * Rel)) & 0x3;
11661+
SrcIdx = LaneBase + HalfSize + Sel;
11662+
}
11663+
}
11664+
11665+
ResultElements.push_back(Vec.getVectorElt(SrcIdx));
11666+
}
11667+
11668+
Out = APValue(ResultElements.data(), ResultElements.size());
11669+
return true;
11670+
}
11671+
1161811672
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1161911673
if (!IsConstantEvaluatedBuiltinCall(E))
1162011674
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -11868,7 +11922,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1186811922

1186911923
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1187011924
}
11871-
1187211925
case clang::X86::BI__builtin_ia32_vprotbi:
1187311926
case clang::X86::BI__builtin_ia32_vprotdi:
1187411927
case clang::X86::BI__builtin_ia32_vprotqi:
@@ -12087,6 +12140,34 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1208712140

1208812141
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1208912142
}
12143+
12144+
case X86::BI__builtin_ia32_pshuflw:
12145+
case X86::BI__builtin_ia32_pshuflw256:
12146+
case X86::BI__builtin_ia32_pshuflw512: {
12147+
APValue R;
12148+
if (!evalPshufBuiltin(Info, E, false, R))
12149+
return false;
12150+
return Success(R, E);
12151+
}
12152+
12153+
case X86::BI__builtin_ia32_pshufhw:
12154+
case X86::BI__builtin_ia32_pshufhw256:
12155+
case X86::BI__builtin_ia32_pshufhw512: {
12156+
APValue R;
12157+
if (!evalPshufBuiltin(Info, E, true, R))
12158+
return false;
12159+
return Success(R, E);
12160+
}
12161+
12162+
case X86::BI__builtin_ia32_pshufd:
12163+
case X86::BI__builtin_ia32_pshufd256:
12164+
case X86::BI__builtin_ia32_pshufd512: {
12165+
APValue R;
12166+
if (!evalPshufBuiltin(Info, E, false, R))
12167+
return false;
12168+
return Success(R, E);
12169+
}
12170+
1209012171
case Builtin::BI__builtin_elementwise_clzg:
1209112172
case Builtin::BI__builtin_elementwise_ctzg: {
1209212173
APValue SourceLHS;

clang/lib/CrossTU/CrossTranslationUnit.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,9 @@ CrossTranslationUnitContext::CrossTranslationUnitContext(CompilerInstance &CI)
252252
CrossTranslationUnitContext::~CrossTranslationUnitContext() {}
253253

254254
std::optional<std::string>
255-
CrossTranslationUnitContext::getLookupName(const NamedDecl *ND) {
255+
CrossTranslationUnitContext::getLookupName(const Decl *D) {
256256
SmallString<128> DeclUSR;
257-
bool Ret = index::generateUSRForDecl(ND, DeclUSR);
257+
bool Ret = index::generateUSRForDecl(D, DeclUSR);
258258
if (Ret)
259259
return {};
260260
return std::string(DeclUSR);

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9224,20 +9224,14 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
92249224
addOffloadCompressArgs(Args, CmdArgs);
92259225

92269226
if (Arg *A = Args.getLastArg(options::OPT_offload_jobs_EQ)) {
9227-
StringRef Val = A->getValue();
9228-
9229-
if (Val.equals_insensitive("jobserver"))
9230-
CmdArgs.push_back(Args.MakeArgString("--wrapper-jobs=jobserver"));
9231-
else {
9232-
int NumThreads;
9233-
if (Val.getAsInteger(10, NumThreads) || NumThreads <= 0) {
9234-
C.getDriver().Diag(diag::err_drv_invalid_int_value)
9235-
<< A->getAsString(Args) << Val;
9236-
} else {
9237-
CmdArgs.push_back(
9238-
Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads)));
9239-
}
9240-
}
9227+
int NumThreads;
9228+
if (StringRef(A->getValue()).getAsInteger(10, NumThreads) ||
9229+
NumThreads <= 0)
9230+
C.getDriver().Diag(diag::err_drv_invalid_int_value)
9231+
<< A->getAsString(Args) << A->getValue();
9232+
else
9233+
CmdArgs.push_back(
9234+
Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads)));
92419235
}
92429236

92439237
const char *Exec =

0 commit comments

Comments
 (0)