Skip to content

Commit 06d6a9d

Browse files
Merge branch 'main' into main
2 parents f894d7d + a4e7d15 commit 06d6a9d

File tree

247 files changed

+68101
-63992
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

247 files changed

+68101
-63992
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -456,17 +456,15 @@ class SMTConv {
456456
llvm::SMTExprRef OperandExp =
457457
getSymExpr(Solver, Ctx, USE->getOperand(), &OperandTy, hasComparison);
458458

459-
if (const BinarySymExpr *BSE =
460-
dyn_cast<BinarySymExpr>(USE->getOperand())) {
461-
if (USE->getOpcode() == UO_Minus &&
462-
BinaryOperator::isComparisonOp(BSE->getOpcode()))
463-
// The comparison operator yields a boolean value in the Z3
464-
// language and applying the unary minus operator on a boolean
465-
// crashes Z3. However, the unary minus does nothing in this
466-
// context (a number is truthy if and only if its negative is
467-
// truthy), so let's just ignore the unary minus.
468-
// TODO: Replace this with a more general solution.
469-
return OperandExp;
459+
// When the operand is a bool expr, but the operator is an integeral
460+
// operator, casting the bool expr to the integer before creating the
461+
// unary operator.
462+
// E.g. -(5 && a)
463+
if (OperandTy == Ctx.BoolTy && OperandTy != *RetTy &&
464+
(*RetTy)->isIntegerType()) {
465+
OperandExp = fromCast(Solver, OperandExp, (*RetTy),
466+
Ctx.getTypeSize(*RetTy), OperandTy, 1);
467+
OperandTy = (*RetTy);
470468
}
471469

472470
llvm::SMTExprRef UnaryExp =

clang/lib/CIR/CodeGen/CIRGenBuilder.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,35 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
574574
info.isSigned, isLvalueVolatile,
575575
addr.getAlignment().getAsAlign().value());
576576
}
577+
578+
cir::VecShuffleOp
579+
createVecShuffle(mlir::Location loc, mlir::Value vec1, mlir::Value vec2,
580+
llvm::ArrayRef<mlir::Attribute> maskAttrs) {
581+
auto vecType = mlir::cast<cir::VectorType>(vec1.getType());
582+
auto resultTy = cir::VectorType::get(getContext(), vecType.getElementType(),
583+
maskAttrs.size());
584+
return cir::VecShuffleOp::create(*this, loc, resultTy, vec1, vec2,
585+
getArrayAttr(maskAttrs));
586+
}
587+
588+
cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1,
589+
mlir::Value vec2,
590+
llvm::ArrayRef<int64_t> mask) {
591+
auto maskAttrs = llvm::to_vector_of<mlir::Attribute>(
592+
llvm::map_range(mask, [&](int32_t idx) {
593+
return cir::IntAttr::get(getSInt32Ty(), idx);
594+
}));
595+
return createVecShuffle(loc, vec1, vec2, maskAttrs);
596+
}
597+
598+
cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1,
599+
llvm::ArrayRef<int64_t> mask) {
600+
/// Create a unary shuffle. The second vector operand of the IR instruction
601+
/// is poison.
602+
cir::ConstantOp poison =
603+
getConstant(loc, cir::PoisonAttr::get(vec1.getType()));
604+
return createVecShuffle(loc, vec1, poison, mask);
605+
}
577606
};
578607

579608
} // namespace clang::CIRGen

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -669,9 +669,18 @@ RValue CIRGenFunction::emitLoadOfExtVectorElementLValue(LValue lv) {
669669
return RValue::get(cir::VecExtractOp::create(builder, loc, vec, index));
670670
}
671671

672-
cgm.errorNYI(
673-
loc, "emitLoadOfExtVectorElementLValue: Result of expr is vector type");
674-
return {};
672+
// Always use shuffle vector to try to retain the original program structure
673+
SmallVector<int64_t> mask;
674+
for (auto i : llvm::seq<unsigned>(0, exprVecTy->getNumElements()))
675+
mask.push_back(getAccessedFieldNo(i, elts));
676+
677+
cir::VecShuffleOp resultVec = builder.createVecShuffle(loc, vec, mask);
678+
if (lv.getType()->isExtVectorBoolType()) {
679+
cgm.errorNYI(loc, "emitLoadOfExtVectorElementLValue: ExtVectorBoolType");
680+
return {};
681+
}
682+
683+
return RValue::get(resultVec);
675684
}
676685

677686
static cir::FuncOp emitFunctionDeclPointer(CIRGenModule &cgm, GlobalDecl gd) {

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
203203
return emitNullValue(e->getType(), cgf.getLoc(e->getSourceRange()));
204204
}
205205

206+
mlir::Value VisitOffsetOfExpr(OffsetOfExpr *e);
207+
206208
mlir::Value VisitOpaqueValueExpr(OpaqueValueExpr *e) {
207209
if (e->isGLValue())
208210
return emitLoadOfLValue(cgf.getOrCreateOpaqueLValueMapping(e),
@@ -2209,6 +2211,21 @@ mlir::Value ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *e) {
22092211
return maybePromoteBoolResult(boolVal, cgf.convertType(e->getType()));
22102212
}
22112213

2214+
mlir::Value ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *e) {
2215+
// Try folding the offsetof to a constant.
2216+
Expr::EvalResult evalResult;
2217+
if (e->EvaluateAsInt(evalResult, cgf.getContext())) {
2218+
mlir::Type type = cgf.convertType(e->getType());
2219+
llvm::APSInt value = evalResult.Val.getInt();
2220+
return builder.getConstAPInt(cgf.getLoc(e->getExprLoc()), type, value);
2221+
}
2222+
2223+
cgf.getCIRGenModule().errorNYI(
2224+
e->getSourceRange(),
2225+
"ScalarExprEmitter::VisitOffsetOfExpr Can't eval expr as int");
2226+
return {};
2227+
}
2228+
22122229
mlir::Value ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *e) {
22132230
QualType promotionTy = getPromotionType(e->getSubExpr()->getType());
22142231
mlir::Value result = VisitRealImag(e, promotionTy);

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,8 +1280,7 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
12801280
return nullptr;
12811281
}
12821282
if (CGM.getLangOpts().OffloadViaLLVM ||
1283-
(CGM.getLangOpts().OffloadingNewDriver &&
1284-
(CGM.getLangOpts().HIP || RelocatableDeviceCode)))
1283+
(CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
12851284
createOffloadingEntries();
12861285
else
12871286
return makeModuleCtorFunction();

clang/lib/Driver/Driver.cpp

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4413,10 +4413,6 @@ void Driver::BuildDefaultActions(Compilation &C, DerivedArgList &Args,
44134413
options::OPT_no_offload_new_driver,
44144414
C.isOffloadingHostKind(Action::OFK_Cuda));
44154415

4416-
bool HIPNoRDC =
4417-
C.isOffloadingHostKind(Action::OFK_HIP) &&
4418-
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
4419-
44204416
// Builder to be used to build offloading actions.
44214417
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
44224418
!UseNewOffloadingDriver
@@ -4550,7 +4546,7 @@ void Driver::BuildDefaultActions(Compilation &C, DerivedArgList &Args,
45504546
// Check if this Linker Job should emit a static library.
45514547
if (ShouldEmitStaticLibrary(Args)) {
45524548
LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image);
4553-
} else if ((UseNewOffloadingDriver && !HIPNoRDC) ||
4549+
} else if (UseNewOffloadingDriver ||
45544550
Args.hasArg(options::OPT_offload_link)) {
45554551
LA = C.MakeAction<LinkerWrapperJobAction>(LinkerInputs, types::TY_Image);
45564552
LA->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
@@ -4887,20 +4883,6 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
48874883
<< "-fhip-emit-relocatable"
48884884
<< "--offload-device-only";
48894885

4890-
// For HIP non-rdc non-device-only compilation, create a linker wrapper
4891-
// action for each host object to link, bundle and wrap device files in
4892-
// it.
4893-
if ((isa<AssembleJobAction>(HostAction) ||
4894-
(isa<BackendJobAction>(HostAction) &&
4895-
HostAction->getType() == types::TY_LTO_BC)) &&
4896-
HIPNoRDC && !offloadDeviceOnly()) {
4897-
ActionList AL{HostAction};
4898-
HostAction = C.MakeAction<LinkerWrapperJobAction>(AL, types::TY_Object);
4899-
HostAction->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
4900-
/*BoundArch=*/nullptr);
4901-
return HostAction;
4902-
}
4903-
49044886
// Don't build offloading actions if we do not have a compile action. If
49054887
// preprocessing only ignore embedding.
49064888
if (!(isa<CompileJobAction>(HostAction) ||
@@ -5065,6 +5047,21 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
50655047
DDep.add(*FatbinAction,
50665048
*C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr,
50675049
Action::OFK_HIP);
5050+
} else if (HIPNoRDC) {
5051+
// Package all the offloading actions into a single output that can be
5052+
// embedded in the host and linked.
5053+
Action *PackagerAction =
5054+
C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image);
5055+
5056+
// For HIP non-RDC compilation, wrap the device binary with linker wrapper
5057+
// before bundling with host code. Do not bind a specific GPU arch here,
5058+
// as the packaged image may contain entries for multiple GPUs.
5059+
ActionList AL{PackagerAction};
5060+
PackagerAction =
5061+
C.MakeAction<LinkerWrapperJobAction>(AL, types::TY_HIP_FATBIN);
5062+
DDep.add(*PackagerAction,
5063+
*C.getOffloadToolChains<Action::OFK_HIP>().first->second,
5064+
/*BoundArch=*/nullptr, Action::OFK_HIP);
50685065
} else {
50695066
// Package all the offloading actions into a single output that can be
50705067
// embedded in the host and linked.
@@ -5194,6 +5191,14 @@ Action *Driver::ConstructPhaseAction(
51945191
return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
51955192
}
51965193
case phases::Backend: {
5194+
// Skip a redundant Backend phase for HIP device code when using the new
5195+
// offload driver, where mid-end is done in linker wrapper.
5196+
if (TargetDeviceOffloadKind == Action::OFK_HIP &&
5197+
Args.hasFlag(options::OPT_offload_new_driver,
5198+
options::OPT_no_offload_new_driver, false) &&
5199+
!offloadDeviceOnly())
5200+
return Input;
5201+
51975202
if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {
51985203
types::ID Output;
51995204
if (Args.hasArg(options::OPT_ffat_lto_objects) &&
@@ -5213,7 +5218,8 @@ Action *Driver::ConstructPhaseAction(
52135218
if (Args.hasArg(options::OPT_emit_llvm) ||
52145219
TargetDeviceOffloadKind == Action::OFK_SYCL ||
52155220
(((Input->getOffloadingToolChain() &&
5216-
Input->getOffloadingToolChain()->getTriple().isAMDGPU()) ||
5221+
Input->getOffloadingToolChain()->getTriple().isAMDGPU() &&
5222+
TargetDeviceOffloadKind != Action::OFK_None) ||
52175223
TargetDeviceOffloadKind == Action::OFK_HIP) &&
52185224
((Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
52195225
false) ||

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7636,7 +7636,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
76367636
CmdArgs.push_back("-fcuda-include-gpubinary");
76377637
CmdArgs.push_back(CudaDeviceInput->getFilename());
76387638
} else if (!HostOffloadingInputs.empty()) {
7639-
if (IsCuda && !IsRDCMode) {
7639+
if ((IsCuda || IsHIP) && !IsRDCMode) {
76407640
assert(HostOffloadingInputs.size() == 1 && "Only one input expected");
76417641
CmdArgs.push_back("-fcuda-include-gpubinary");
76427642
CmdArgs.push_back(HostOffloadingInputs.front().getFilename());
@@ -9093,7 +9093,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
90939093
auto ShouldForward = [&](const llvm::DenseSet<unsigned> &Set, Arg *A,
90949094
const ToolChain &TC) {
90959095
// CMake hack to avoid printing verbose informatoin for HIP non-RDC mode.
9096-
if (A->getOption().matches(OPT_v) && JA.getType() == types::TY_Object)
9096+
if (A->getOption().matches(OPT_v) && JA.getType() == types::TY_HIP_FATBIN)
90979097
return false;
90989098
return (Set.contains(A->getOption().getID()) ||
90999099
(A->getOption().getGroup().isValid() &&
@@ -9175,7 +9175,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
91759175
// non-RDC mode compilation. This confuses default CMake implicit linker
91769176
// argument parsing when the language is set to HIP and the system linker is
91779177
// also `ld.lld`.
9178-
if (Args.hasArg(options::OPT_v) && JA.getType() != types::TY_Object)
9178+
if (Args.hasArg(options::OPT_v) && JA.getType() != types::TY_HIP_FATBIN)
91799179
CmdArgs.push_back("--wrapper-verbose");
91809180
if (Arg *A = Args.getLastArg(options::OPT_cuda_path_EQ))
91819181
CmdArgs.push_back(
@@ -9247,14 +9247,14 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
92479247

92489248
// We use action type to differentiate two use cases of the linker wrapper.
92499249
// TY_Image for normal linker wrapper work.
9250-
// TY_Object for HIP fno-gpu-rdc embedding device binary in a relocatable
9251-
// object.
9252-
assert(JA.getType() == types::TY_Object || JA.getType() == types::TY_Image);
9253-
if (JA.getType() == types::TY_Object) {
9250+
// TY_HIP_FATBIN for HIP fno-gpu-rdc emitting a fat binary without wrapping.
9251+
assert(JA.getType() == types::TY_HIP_FATBIN ||
9252+
JA.getType() == types::TY_Image);
9253+
if (JA.getType() == types::TY_HIP_FATBIN) {
9254+
CmdArgs.push_back("--emit-fatbin-only");
92549255
CmdArgs.append({"-o", Output.getFilename()});
92559256
for (auto Input : Inputs)
92569257
CmdArgs.push_back(Input.getFilename());
9257-
CmdArgs.push_back("-r");
92589258
} else
92599259
for (const char *LinkArg : LinkCommand->getArguments())
92609260
CmdArgs.push_back(LinkArg);

clang/lib/Headers/avx512fintrin.h

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,26 +1450,19 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
14501450
(__v8df)_mm512_sqrt_round_pd((A), (R)), \
14511451
(__v8df)_mm512_setzero_pd()))
14521452

1453-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
1454-
_mm512_sqrt_pd(__m512d __A)
1455-
{
1456-
return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1457-
_MM_FROUND_CUR_DIRECTION);
1453+
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) {
1454+
return (__m512d)__builtin_elementwise_sqrt((__v8df)__A);
14581455
}
14591456

14601457
static __inline__ __m512d __DEFAULT_FN_ATTRS512
1461-
_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1462-
{
1463-
return (__m512d)__builtin_ia32_selectpd_512(__U,
1464-
(__v8df)_mm512_sqrt_pd(__A),
1458+
_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) {
1459+
return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
14651460
(__v8df)__W);
14661461
}
14671462

14681463
static __inline__ __m512d __DEFAULT_FN_ATTRS512
1469-
_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1470-
{
1471-
return (__m512d)__builtin_ia32_selectpd_512(__U,
1472-
(__v8df)_mm512_sqrt_pd(__A),
1464+
_mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A) {
1465+
return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
14731466
(__v8df)_mm512_setzero_pd());
14741467
}
14751468

@@ -1486,26 +1479,19 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
14861479
(__v16sf)_mm512_sqrt_round_ps((A), (R)), \
14871480
(__v16sf)_mm512_setzero_ps()))
14881481

1489-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
1490-
_mm512_sqrt_ps(__m512 __A)
1491-
{
1492-
return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1493-
_MM_FROUND_CUR_DIRECTION);
1482+
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) {
1483+
return (__m512)__builtin_elementwise_sqrt((__v16sf)__A);
14941484
}
14951485

1496-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
1497-
_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1498-
{
1499-
return (__m512)__builtin_ia32_selectps_512(__U,
1500-
(__v16sf)_mm512_sqrt_ps(__A),
1486+
static __inline__ __m512 __DEFAULT_FN_ATTRS512
1487+
_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) {
1488+
return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
15011489
(__v16sf)__W);
15021490
}
15031491

1504-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
1505-
_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1506-
{
1507-
return (__m512)__builtin_ia32_selectps_512(__U,
1508-
(__v16sf)_mm512_sqrt_ps(__A),
1492+
static __inline__ __m512 __DEFAULT_FN_ATTRS512
1493+
_mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A) {
1494+
return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
15091495
(__v16sf)_mm512_setzero_ps());
15101496
}
15111497

clang/lib/Headers/avx512fp16intrin.h

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,24 +1401,20 @@ _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) {
14011401
(__v32hf)_mm512_setzero_ph()))
14021402

14031403
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) {
1404-
return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A,
1405-
_MM_FROUND_CUR_DIRECTION);
1404+
return (__m512h)__builtin_elementwise_sqrt((__v32hf)__A);
14061405
}
14071406

14081407
static __inline__ __m512h __DEFAULT_FN_ATTRS512
14091408
_mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
14101409
return (__m512h)__builtin_ia32_selectph_512(
1411-
(__mmask32)(__U),
1412-
(__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1413-
(__v32hf)(__m512h)(__W));
1410+
(__mmask32)(__U), (__v32hf)_mm512_sqrt_ph(__A), (__v32hf)(__m512h)(__W));
14141411
}
14151412

14161413
static __inline__ __m512h __DEFAULT_FN_ATTRS512
14171414
_mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) {
1418-
return (__m512h)__builtin_ia32_selectph_512(
1419-
(__mmask32)(__U),
1420-
(__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1421-
(__v32hf)_mm512_setzero_ph());
1415+
return (__m512h)__builtin_ia32_selectph_512((__mmask32)(__U),
1416+
(__v32hf)_mm512_sqrt_ph(__A),
1417+
(__v32hf)_mm512_setzero_ph());
14221418
}
14231419

14241420
#define _mm_sqrt_round_sh(A, B, R) \

clang/lib/Headers/cpuid.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,6 @@
253253
#define bit_RDPRU 0x00000010
254254
#define bit_WBNOINVD 0x00000200
255255

256-
/* Features in %ebx for leaf 0x24 */
257-
#define bit_AVX10_256 0x00020000
258-
#define bit_AVX10_512 0x00040000
259-
260256
#ifdef __i386__
261257
#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
262258
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \

0 commit comments

Comments
 (0)