Skip to content

Commit 4982567

Browse files
committed
merge main into amd-staging
2 parents f1e9f96 + fb925b5 commit 4982567

File tree

225 files changed

+13969
-5067
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

225 files changed

+13969
-5067
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,6 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
280280
def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
281281
def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
282282
def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
283-
def pslldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">;
284-
def psrldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">;
285283
}
286284

287285
let Features = "sse2",
@@ -300,6 +298,9 @@ let Features = "sse2",
300298

301299
def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
302300
def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
301+
302+
def pslldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">;
303+
def psrldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">;
303304
}
304305

305306
let Features = "sse3", Attributes = [NoThrow] in {
@@ -607,12 +608,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
607608
: X86Builtin<
608609
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
609610
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
610-
def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">;
611611
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
612612
def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
613613
def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
614614
def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
615-
def psrldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">;
616615
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
617616
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
618617
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
@@ -646,10 +645,12 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
646645
def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
647646
def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
648647
def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
648+
def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">;
649649

650650
def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
651651
def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
652652
def psrlqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
653+
def psrldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">;
653654

654655
def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
655656
def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
@@ -2090,6 +2091,9 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
20902091
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
20912092
def psrlw512
20922093
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
2094+
}
2095+
2096+
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
20932097
def pslldqi512_byteshift : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Constant int)">;
20942098
def psrldqi512_byteshift : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Constant int)">;
20952099
}

clang/lib/AST/ByteCode/Compiler.cpp

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3273,34 +3273,43 @@ bool Compiler<Emitter>::VisitCXXConstructExpr(const CXXConstructExpr *E) {
32733273
}
32743274

32753275
if (T->isArrayType()) {
3276-
const ConstantArrayType *CAT =
3277-
Ctx.getASTContext().getAsConstantArrayType(E->getType());
3278-
if (!CAT)
3279-
return false;
3280-
3281-
size_t NumElems = CAT->getZExtSize();
32823276
const Function *Func = getFunction(E->getConstructor());
32833277
if (!Func)
32843278
return false;
32853279

3286-
// FIXME(perf): We're calling the constructor once per array element here,
3287-
// in the old intepreter we had a special-case for trivial constructors.
3288-
for (size_t I = 0; I != NumElems; ++I) {
3289-
if (!this->emitConstUint64(I, E))
3290-
return false;
3291-
if (!this->emitArrayElemPtrUint64(E))
3292-
return false;
3280+
if (!this->emitDupPtr(E))
3281+
return false;
32933282

3294-
// Constructor arguments.
3295-
for (const auto *Arg : E->arguments()) {
3296-
if (!this->visit(Arg))
3297-
return false;
3283+
std::function<bool(QualType)> initArrayDimension;
3284+
initArrayDimension = [&](QualType T) -> bool {
3285+
if (!T->isArrayType()) {
3286+
// Constructor arguments.
3287+
for (const auto *Arg : E->arguments()) {
3288+
if (!this->visit(Arg))
3289+
return false;
3290+
}
3291+
3292+
return this->emitCall(Func, 0, E);
32983293
}
32993294

3300-
if (!this->emitCall(Func, 0, E))
3295+
const ConstantArrayType *CAT =
3296+
Ctx.getASTContext().getAsConstantArrayType(T);
3297+
if (!CAT)
33013298
return false;
3302-
}
3303-
return true;
3299+
QualType ElemTy = CAT->getElementType();
3300+
unsigned NumElems = CAT->getZExtSize();
3301+
for (size_t I = 0; I != NumElems; ++I) {
3302+
if (!this->emitConstUint64(I, E))
3303+
return false;
3304+
if (!this->emitArrayElemPtrUint64(E))
3305+
return false;
3306+
if (!initArrayDimension(ElemTy))
3307+
return false;
3308+
}
3309+
return this->emitPopPtr(E);
3310+
};
3311+
3312+
return initArrayDimension(E->getType());
33043313
}
33053314

33063315
return false;
@@ -3599,8 +3608,6 @@ bool Compiler<Emitter>::VisitCXXNewExpr(const CXXNewExpr *E) {
35993608
if (PlacementDest) {
36003609
if (!this->visit(PlacementDest))
36013610
return false;
3602-
if (!this->emitStartLifetime(E))
3603-
return false;
36043611
if (!this->emitGetLocal(SizeT, ArrayLen, E))
36053612
return false;
36063613
if (!this->emitCheckNewTypeMismatchArray(SizeT, E, E))
@@ -3740,10 +3747,9 @@ bool Compiler<Emitter>::VisitCXXNewExpr(const CXXNewExpr *E) {
37403747
if (PlacementDest) {
37413748
if (!this->visit(PlacementDest))
37423749
return false;
3743-
if (!this->emitStartLifetime(E))
3744-
return false;
37453750
if (!this->emitCheckNewTypeMismatch(E, E))
37463751
return false;
3752+
37473753
} else {
37483754
// Allocate just one element.
37493755
if (!this->emitAlloc(Desc, E))

clang/lib/AST/ByteCode/Interp.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1903,12 +1903,19 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, const Expr *E,
19031903
if (Ptr.inUnion() && Ptr.getBase().getRecord()->isUnion())
19041904
Ptr.activate();
19051905

1906+
if (Ptr.isZero()) {
1907+
S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_null)
1908+
<< AK_Construct;
1909+
return false;
1910+
}
1911+
19061912
if (!Ptr.isBlockPointer())
19071913
return false;
19081914

1915+
startLifetimeRecurse(Ptr);
1916+
19091917
// Similar to CheckStore(), but with the additional CheckTemporary() call and
19101918
// the AccessKinds are different.
1911-
19121919
if (!Ptr.block()->isAccessible()) {
19131920
if (!CheckExtern(S, OpPC, Ptr))
19141921
return false;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3320,6 +3320,38 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
33203320
return true;
33213321
}
33223322

3323+
static bool interp__builtin_x86_byteshift(
3324+
InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID,
3325+
llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I,
3326+
unsigned Shift)>
3327+
Fn) {
3328+
assert(Call->getNumArgs() == 2);
3329+
3330+
APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
3331+
uint64_t Shift = ImmAPS.getZExtValue() & 0xff;
3332+
3333+
const Pointer &Src = S.Stk.pop<Pointer>();
3334+
if (!Src.getFieldDesc()->isPrimitiveArray())
3335+
return false;
3336+
3337+
unsigned NumElems = Src.getNumElems();
3338+
const Pointer &Dst = S.Stk.peek<Pointer>();
3339+
PrimType ElemT = Src.getFieldDesc()->getPrimType();
3340+
3341+
for (unsigned Lane = 0; Lane != NumElems; Lane += 16) {
3342+
for (unsigned I = 0; I != 16; ++I) {
3343+
unsigned Base = Lane + I;
3344+
APSInt Result = APSInt(Fn(Src, Lane, I, Shift));
3345+
INT_TYPE_SWITCH_NO_BOOL(ElemT,
3346+
{ Dst.elem<T>(Base) = static_cast<T>(Result); });
3347+
}
3348+
}
3349+
3350+
Dst.initializeAllElements();
3351+
3352+
return true;
3353+
}
3354+
33233355
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
33243356
uint32_t BuiltinID) {
33253357
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4390,6 +4422,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
43904422
case X86::BI__builtin_ia32_vec_set_v4di:
43914423
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
43924424

4425+
case X86::BI__builtin_ia32_pslldqi128_byteshift:
4426+
case X86::BI__builtin_ia32_pslldqi256_byteshift:
4427+
case X86::BI__builtin_ia32_pslldqi512_byteshift:
4428+
// These SLLDQ intrinsics always operate on byte elements (8 bits).
4429+
// The lane width is hardcoded to 16 to match the SIMD register size,
4430+
// but the algorithm processes one byte per iteration,
4431+
// so APInt(8, ...) is correct and intentional.
4432+
return interp__builtin_x86_byteshift(
4433+
S, OpPC, Call, BuiltinID,
4434+
[](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) {
4435+
if (I < Shift) {
4436+
return APInt(8, 0);
4437+
}
4438+
return APInt(8, Src.elem<uint8_t>(Lane + I - Shift));
4439+
});
4440+
4441+
case X86::BI__builtin_ia32_psrldqi128_byteshift:
4442+
case X86::BI__builtin_ia32_psrldqi256_byteshift:
4443+
case X86::BI__builtin_ia32_psrldqi512_byteshift:
4444+
// These SRLDQ intrinsics always operate on byte elements (8 bits).
4445+
// The lane width is hardcoded to 16 to match the SIMD register size,
4446+
// but the algorithm processes one byte per iteration,
4447+
// so APInt(8, ...) is correct and intentional.
4448+
return interp__builtin_x86_byteshift(
4449+
S, OpPC, Call, BuiltinID,
4450+
[](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) {
4451+
if (I + Shift < 16) {
4452+
return APInt(8, Src.elem<uint8_t>(Lane + I + Shift));
4453+
}
4454+
4455+
return APInt(8, 0);
4456+
});
4457+
43934458
default:
43944459
S.FFDiag(S.Current->getLocation(OpPC),
43954460
diag::note_invalid_subexpr_in_const_expr)

clang/lib/AST/ByteCode/Opcodes.td

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -866,19 +866,13 @@ def Free : Opcode {
866866
let Args = [ArgBool, ArgBool];
867867
}
868868

869-
def CheckNewTypeMismatch : Opcode {
870-
let Args = [ArgExpr];
871-
}
872-
873-
def InvalidNewDeleteExpr : Opcode {
874-
let Args = [ArgExpr];
875-
}
876-
869+
def CheckNewTypeMismatch : Opcode { let Args = [ArgExpr]; }
877870
def CheckNewTypeMismatchArray : Opcode {
878871
let Types = [IntegerTypeClass];
879872
let Args = [ArgExpr];
880873
let HasGroup = 1;
881874
}
875+
def InvalidNewDeleteExpr : Opcode { let Args = [ArgExpr]; }
882876

883877
def IsConstantContext: Opcode;
884878
def CheckAllocations : Opcode;

clang/lib/AST/ExprConstant.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12906,6 +12906,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1290612906

1290712907
return Success(APValue(Elems.data(), NumElems), E);
1290812908
}
12909+
12910+
case X86::BI__builtin_ia32_pslldqi128_byteshift:
12911+
case X86::BI__builtin_ia32_pslldqi256_byteshift:
12912+
case X86::BI__builtin_ia32_pslldqi512_byteshift: {
12913+
assert(E->getNumArgs() == 2);
12914+
12915+
APValue Src;
12916+
APSInt Imm;
12917+
if (!EvaluateAsRValue(Info, E->getArg(0), Src) ||
12918+
!EvaluateInteger(E->getArg(1), Imm, Info))
12919+
return false;
12920+
12921+
unsigned VecLen = Src.getVectorLength();
12922+
unsigned Shift = Imm.getZExtValue() & 0xff;
12923+
12924+
SmallVector<APValue> ResultElements;
12925+
for (unsigned Lane = 0; Lane != VecLen; Lane += 16) {
12926+
for (unsigned I = 0; I != 16; ++I) {
12927+
if (I < Shift) {
12928+
APSInt Zero(8, /*isUnsigned=*/true);
12929+
Zero = 0;
12930+
ResultElements.push_back(APValue(Zero));
12931+
} else {
12932+
ResultElements.push_back(Src.getVectorElt(Lane + I - Shift));
12933+
}
12934+
}
12935+
}
12936+
12937+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12938+
}
12939+
12940+
case X86::BI__builtin_ia32_psrldqi128_byteshift:
12941+
case X86::BI__builtin_ia32_psrldqi256_byteshift:
12942+
case X86::BI__builtin_ia32_psrldqi512_byteshift: {
12943+
assert(E->getNumArgs() == 2);
12944+
12945+
APValue Src;
12946+
APSInt Imm;
12947+
if (!EvaluateAsRValue(Info, E->getArg(0), Src) ||
12948+
!EvaluateInteger(E->getArg(1), Imm, Info))
12949+
return false;
12950+
12951+
unsigned VecLen = Src.getVectorLength();
12952+
unsigned Shift = Imm.getZExtValue() & 0xff;
12953+
12954+
SmallVector<APValue> ResultElements;
12955+
for (unsigned Lane = 0; Lane != VecLen; Lane += 16) {
12956+
for (unsigned I = 0; I != 16; ++I) {
12957+
if (I + Shift < 16) {
12958+
ResultElements.push_back(Src.getVectorElt(Lane + I + Shift));
12959+
} else {
12960+
APSInt Zero(8, /*isUnsigned=*/true);
12961+
Zero = 0;
12962+
ResultElements.push_back(APValue(Zero));
12963+
}
12964+
}
12965+
}
12966+
12967+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12968+
}
1290912969
}
1291012970
}
1291112971

clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -922,9 +922,9 @@ class ConstExprEmitter
922922
}
923923

924924
mlir::Attribute VisitCastExpr(CastExpr *e, QualType destType) {
925-
if (isa<ExplicitCastExpr>(e))
926-
cgm.errorNYI(e->getBeginLoc(),
927-
"ConstExprEmitter::VisitCastExpr explicit cast");
925+
if (const auto *ece = dyn_cast<ExplicitCastExpr>(e))
926+
cgm.emitExplicitCastExprType(ece,
927+
const_cast<CIRGenFunction *>(emitter.cgf));
928928

929929
Expr *subExpr = e->getSubExpr();
930930

0 commit comments

Comments
 (0)