Skip to content

Commit 91b9d79

Browse files
authored
Merge branch 'main' into cindex_inline
2 parents 674bdfb + 8aa4997 commit 91b9d79

File tree

14 files changed

+497
-25
lines changed

14 files changed

+497
-25
lines changed

clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/pro-bounds-constant-array-index-c++03.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
// RUN: %check_clang_tidy -std=c++98-or-later %s cppcoreguidelines-pro-bounds-constant-array-index %t
22

3-
// Note: this test expects no diagnostics, but FileCheck cannot handle that,
4-
// hence the use of | count 0.
53
template <int index> struct B {
64
int get() {
75
// The next line used to crash the check (in C++03 mode only).

libcxx/utils/ci/buildkite-pipeline.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ steps:
103103
queue: libcxx-builders
104104
os: aix
105105
<<: *common
106-
skip: "Until https://github.com/llvm/llvm-project/issues/162516 has been resolved"
106+
skip: "https://github.com/llvm/llvm-project/issues/162516"
107107

108108
- label: AIX (64-bit)
109109
command: libcxx/utils/ci/run-buildbot aix
@@ -115,7 +115,7 @@ steps:
115115
queue: libcxx-builders
116116
os: aix
117117
<<: *common
118-
skip: "Until https://github.com/llvm/llvm-project/issues/162516 has been resolved"
118+
skip: "https://github.com/llvm/llvm-project/issues/162516"
119119

120120
- group: ':freebsd: FreeBSD'
121121
steps:

llvm/include/llvm/IR/Instructions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1715,7 +1715,7 @@ class SelectInst : public Instruction {
17151715
static SelectInst *Create(Value *C, Value *S1, Value *S2,
17161716
const Twine &NameStr = "",
17171717
InsertPosition InsertBefore = nullptr,
1718-
Instruction *MDFrom = nullptr) {
1718+
const Instruction *MDFrom = nullptr) {
17191719
SelectInst *Sel =
17201720
new (AllocMarker) SelectInst(C, S1, S2, NameStr, InsertBefore);
17211721
if (MDFrom)

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -880,11 +880,13 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
880880
// zext(bool) + C -> bool ? C + 1 : C
881881
if (match(Op0, m_ZExt(m_Value(X))) &&
882882
X->getType()->getScalarSizeInBits() == 1)
883-
return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1);
883+
return createSelectInstWithUnknownProfile(X, InstCombiner::AddOne(Op1C),
884+
Op1);
884885
// sext(bool) + C -> bool ? C - 1 : C
885886
if (match(Op0, m_SExt(m_Value(X))) &&
886887
X->getType()->getScalarSizeInBits() == 1)
887-
return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1);
888+
return createSelectInstWithUnknownProfile(X, InstCombiner::SubOne(Op1C),
889+
Op1);
888890

889891
// ~X + C --> (C-1) - X
890892
if (match(Op0, m_Not(m_Value(X)))) {

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -471,15 +471,16 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
471471
Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable,
472472
unsigned Depth = 0);
473473

474-
SelectInst *createSelectInst(Value *C, Value *S1, Value *S2,
475-
const Twine &NameStr = "",
476-
InsertPosition InsertBefore = nullptr,
477-
Instruction *MDFrom = nullptr) {
478-
SelectInst *SI =
479-
SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom);
480-
if (!MDFrom)
481-
setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE);
482-
return SI;
474+
/// Create `select C, S1, S2`. Use only when the profile cannot be calculated
475+
/// from existing profile metadata: if the Function has profiles, this will
476+
/// set the profile of this select to "unknown".
477+
SelectInst *
478+
createSelectInstWithUnknownProfile(Value *C, Value *S1, Value *S2,
479+
const Twine &NameStr = "",
480+
InsertPosition InsertBefore = nullptr) {
481+
auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr);
482+
setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE);
483+
return Sel;
483484
}
484485

485486
public:

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1253,7 +1253,8 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
12531253
// shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
12541254
if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
12551255
auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1);
1256-
return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty));
1256+
return createSelectInstWithUnknownProfile(X, NewC,
1257+
ConstantInt::getNullValue(Ty));
12571258
}
12581259
}
12591260

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1737,7 +1737,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
17371737
Constant *Zero = ConstantInt::getNullValue(BO.getType());
17381738
Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
17391739
Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1740-
return createSelectInst(X, TVal, FVal);
1740+
return createSelectInstWithUnknownProfile(X, TVal, FVal);
17411741
}
17421742

17431743
static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,

mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ void populateXeGPUFoldAliasOpsPatterns(RewritePatternSet &patterns);
6464

6565
/// Appends patterns for XeGPU SIMT distribution into `patterns`.
6666
void populateXeGPUSubgroupDistributePatterns(RewritePatternSet &patterns);
67+
/// Appends patterns for moving function body into gpu.warp_execute_on_lane0 op.
68+
void populateXeGPUMoveFuncBodyToWarpOpPatterns(RewritePatternSet &patterns);
69+
/// Appends patterns for XeGPU workgroup to subgroup distribution into
70+
/// `patterns`.
6771
void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns);
6872

6973
/// Collect a set of patterns to unroll xegpu operations to a smaller shapes.

mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp

Lines changed: 145 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,135 @@ class LLVMLoadStoreToOCLPattern : public OpConversionPattern<OpType> {
714714
}
715715
};
716716

717+
//===----------------------------------------------------------------------===//
718+
// GPU index id operations
719+
//===----------------------------------------------------------------------===//
720+
/*
721+
// Launch Config ops
722+
// dimidx - x, y, z - is fixed to i32
723+
// return type is set by XeVM type converter
724+
// get_local_id
725+
xevm::WorkitemIdXOp;
726+
xevm::WorkitemIdYOp;
727+
xevm::WorkitemIdZOp;
728+
// get_local_size
729+
xevm::WorkgroupDimXOp;
730+
xevm::WorkgroupDimYOp;
731+
xevm::WorkgroupDimZOp;
732+
// get_group_id
733+
xevm::WorkgroupIdXOp;
734+
xevm::WorkgroupIdYOp;
735+
xevm::WorkgroupIdZOp;
736+
// get_num_groups
737+
xevm::GridDimXOp;
738+
xevm::GridDimYOp;
739+
xevm::GridDimZOp;
740+
// get_global_id : to be added if needed
741+
*/
742+
743+
// Helpers to get the OpenCL function name and dimension argument for each op.
744+
static std::pair<StringRef, int64_t> getConfig(xevm::WorkitemIdXOp) {
745+
return {"get_local_id", 0};
746+
}
747+
static std::pair<StringRef, int64_t> getConfig(xevm::WorkitemIdYOp) {
748+
return {"get_local_id", 1};
749+
}
750+
static std::pair<StringRef, int64_t> getConfig(xevm::WorkitemIdZOp) {
751+
return {"get_local_id", 2};
752+
}
753+
static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupDimXOp) {
754+
return {"get_local_size", 0};
755+
}
756+
static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupDimYOp) {
757+
return {"get_local_size", 1};
758+
}
759+
static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupDimZOp) {
760+
return {"get_local_size", 2};
761+
}
762+
static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupIdXOp) {
763+
return {"get_group_id", 0};
764+
}
765+
static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupIdYOp) {
766+
return {"get_group_id", 1};
767+
}
768+
static std::pair<StringRef, int64_t> getConfig(xevm::WorkgroupIdZOp) {
769+
return {"get_group_id", 2};
770+
}
771+
static std::pair<StringRef, int64_t> getConfig(xevm::GridDimXOp) {
772+
return {"get_num_groups", 0};
773+
}
774+
static std::pair<StringRef, int64_t> getConfig(xevm::GridDimYOp) {
775+
return {"get_num_groups", 1};
776+
}
777+
static std::pair<StringRef, int64_t> getConfig(xevm::GridDimZOp) {
778+
return {"get_num_groups", 2};
779+
}
780+
/// Replace `xevm.*` with an `llvm.call` to the corresponding OpenCL func with
781+
/// a constant argument for the dimension - x, y or z.
782+
template <typename OpType>
783+
class LaunchConfigOpToOCLPattern : public OpConversionPattern<OpType> {
784+
using OpConversionPattern<OpType>::OpConversionPattern;
785+
LogicalResult
786+
matchAndRewrite(OpType op, typename OpType::Adaptor adaptor,
787+
ConversionPatternRewriter &rewriter) const override {
788+
Location loc = op->getLoc();
789+
auto [baseName, dim] = getConfig(op);
790+
Type dimTy = rewriter.getI32Type();
791+
Value dimVal = LLVM::ConstantOp::create(rewriter, loc, dimTy,
792+
static_cast<int64_t>(dim));
793+
std::string func = mangle(baseName, {dimTy}, {true});
794+
Type resTy = op.getType();
795+
auto call =
796+
createDeviceFunctionCall(rewriter, func, resTy, {dimTy}, {dimVal}, {},
797+
noUnwindWillReturnAttrs, op.getOperation());
798+
constexpr auto noModRef = LLVM::ModRefInfo::NoModRef;
799+
auto memAttr = rewriter.getAttr<LLVM::MemoryEffectsAttr>(
800+
/*other=*/noModRef,
801+
/*argMem=*/noModRef, /*inaccessibleMem=*/noModRef);
802+
call.setMemoryEffectsAttr(memAttr);
803+
rewriter.replaceOp(op, call);
804+
return success();
805+
}
806+
};
807+
808+
/*
809+
// Subgroup ops
810+
// get_sub_group_local_id
811+
xevm::LaneIdOp;
812+
// get_sub_group_id
813+
xevm::SubgroupIdOp;
814+
// get_sub_group_size
815+
xevm::SubgroupSizeOp;
816+
// get_num_sub_groups : to be added if needed
817+
*/
818+
819+
// Helpers to get the OpenCL function name for each op.
820+
static StringRef getConfig(xevm::LaneIdOp) { return "get_sub_group_local_id"; }
821+
static StringRef getConfig(xevm::SubgroupIdOp) { return "get_sub_group_id"; }
822+
static StringRef getConfig(xevm::SubgroupSizeOp) {
823+
return "get_sub_group_size";
824+
}
825+
template <typename OpType>
826+
class SubgroupOpWorkitemOpToOCLPattern : public OpConversionPattern<OpType> {
827+
using OpConversionPattern<OpType>::OpConversionPattern;
828+
LogicalResult
829+
matchAndRewrite(OpType op, typename OpType::Adaptor adaptor,
830+
ConversionPatternRewriter &rewriter) const override {
831+
std::string func = mangle(getConfig(op).str(), {});
832+
Type resTy = op.getType();
833+
auto call =
834+
createDeviceFunctionCall(rewriter, func, resTy, {}, {}, {},
835+
noUnwindWillReturnAttrs, op.getOperation());
836+
constexpr auto noModRef = LLVM::ModRefInfo::NoModRef;
837+
auto memAttr = rewriter.getAttr<LLVM::MemoryEffectsAttr>(
838+
/*other=*/noModRef,
839+
/*argMem=*/noModRef, /*inaccessibleMem=*/noModRef);
840+
call.setMemoryEffectsAttr(memAttr);
841+
rewriter.replaceOp(op, call);
842+
return success();
843+
}
844+
};
845+
717846
//===----------------------------------------------------------------------===//
718847
// Pass Definition
719848
//===----------------------------------------------------------------------===//
@@ -775,7 +904,22 @@ void ::mlir::populateXeVMToLLVMConversionPatterns(ConversionTarget &target,
775904
LLVMLoadStoreToOCLPattern<LLVM::LoadOp>,
776905
LLVMLoadStoreToOCLPattern<LLVM::StoreOp>,
777906
BlockLoadStore1DToOCLPattern<BlockLoadOp>,
778-
BlockLoadStore1DToOCLPattern<BlockStoreOp>>(
907+
BlockLoadStore1DToOCLPattern<BlockStoreOp>,
908+
LaunchConfigOpToOCLPattern<WorkitemIdXOp>,
909+
LaunchConfigOpToOCLPattern<WorkitemIdYOp>,
910+
LaunchConfigOpToOCLPattern<WorkitemIdZOp>,
911+
LaunchConfigOpToOCLPattern<WorkgroupDimXOp>,
912+
LaunchConfigOpToOCLPattern<WorkgroupDimYOp>,
913+
LaunchConfigOpToOCLPattern<WorkgroupDimZOp>,
914+
LaunchConfigOpToOCLPattern<WorkgroupIdXOp>,
915+
LaunchConfigOpToOCLPattern<WorkgroupIdYOp>,
916+
LaunchConfigOpToOCLPattern<WorkgroupIdZOp>,
917+
LaunchConfigOpToOCLPattern<GridDimXOp>,
918+
LaunchConfigOpToOCLPattern<GridDimYOp>,
919+
LaunchConfigOpToOCLPattern<GridDimZOp>,
920+
SubgroupOpWorkitemOpToOCLPattern<LaneIdOp>,
921+
SubgroupOpWorkitemOpToOCLPattern<SubgroupIdOp>,
922+
SubgroupOpWorkitemOpToOCLPattern<SubgroupSizeOp>>(
779923
patterns.getContext());
780924
}
781925

mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,7 @@ static bool requireTranspose(const xegpu::LayoutAttr layout,
195195
/// }
196196
/// return %0
197197
/// }
198-
struct MoveFuncBodyToWarpExecuteOnLane0
199-
: public OpRewritePattern<gpu::GPUFuncOp> {
198+
struct MoveFuncBodyToWarpOp : public OpRewritePattern<gpu::GPUFuncOp> {
200199
using OpRewritePattern<gpu::GPUFuncOp>::OpRewritePattern;
201200
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp,
202201
PatternRewriter &rewriter) const override {
@@ -1447,6 +1446,11 @@ void xegpu::populateXeGPUSubgroupDistributePatterns(
14471446
/*pattern benefit=*/highPatternBenefit);
14481447
}
14491448

1449+
void xegpu::populateXeGPUMoveFuncBodyToWarpOpPatterns(
1450+
RewritePatternSet &patterns) {
1451+
patterns.add<MoveFuncBodyToWarpOp>(patterns.getContext());
1452+
}
1453+
14501454
void XeGPUSubgroupDistributePass::runOnOperation() {
14511455
// Step 1: Attach layouts to op operands.
14521456
// TODO: Following assumptions are made:
@@ -1473,7 +1477,7 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
14731477
// gpu.warp_execute_on_lane_0 operation.
14741478
{
14751479
RewritePatternSet patterns(&getContext());
1476-
patterns.add<MoveFuncBodyToWarpExecuteOnLane0>(&getContext());
1480+
xegpu::populateXeGPUMoveFuncBodyToWarpOpPatterns(patterns);
14771481

14781482
if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) {
14791483
signalPassFailure();

0 commit comments

Comments
 (0)