Skip to content

Commit 4584408

Browse files
authored
Merge branch 'main' into fortify_strcat
2 parents 8eea492 + 925ce5a commit 4584408

File tree

10 files changed

+230
-14
lines changed

10 files changed

+230
-14
lines changed

clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ dependencies::createCompilerInvocation(ArrayRef<std::string> CommandLine,
432432
}
433433

434434
std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
435-
dependencies::initVFSForTUBuferScanning(
435+
dependencies::initVFSForTUBufferScanning(
436436
IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
437437
ArrayRef<std::string> CommandLine, StringRef WorkingDirectory,
438438
llvm::MemoryBufferRef TUBuffer) {

clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,10 @@ createCompilerInvocation(ArrayRef<std::string> CommandLine,
113113
DiagnosticsEngine &Diags);
114114

115115
std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
116-
initVFSForTUBuferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
117-
ArrayRef<std::string> CommandLine,
118-
StringRef WorkingDirectory,
119-
llvm::MemoryBufferRef TUBuffer);
116+
initVFSForTUBufferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
117+
ArrayRef<std::string> CommandLine,
118+
StringRef WorkingDirectory,
119+
llvm::MemoryBufferRef TUBuffer);
120120

121121
std::pair<IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem>,
122122
std::vector<std::string>>

clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ bool DependencyScanningWorker::computeDependencies(
157157
DependencyConsumer &Consumer, DependencyActionController &Controller,
158158
DiagnosticConsumer &DC, std::optional<llvm::MemoryBufferRef> TUBuffer) {
159159
if (TUBuffer) {
160-
auto [FinalFS, FinalCommandLine] = initVFSForTUBuferScanning(
160+
auto [FinalFS, FinalCommandLine] = initVFSForTUBufferScanning(
161161
BaseFS, CommandLine, WorkingDirectory, *TUBuffer);
162162
return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer,
163163
Controller, DC, FinalFS);

lldb/docs/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ if (LLDB_ENABLE_PYTHON AND SPHINX_FOUND)
3434
COMMAND "${CMAKE_COMMAND}" -E copy "${LLDB_SOURCE_DIR}/examples/python/templates/scripted_thread_plan.py" "${CMAKE_CURRENT_BINARY_DIR}/lldb/plugins/"
3535
COMMENT "Copying lldb.py to pretend its a Python package.")
3636

37-
add_dependencies(lldb-python-doc-package swig_wrapper_python lldb-python)
37+
add_dependencies(lldb-python-doc-package swig_wrapper_python)
3838

3939
# FIXME: Don't treat Sphinx warnings as errors. The files generated by
4040
# automodapi are full of warnings (partly caused by SWIG, our documentation

llvm/lib/Target/RISCV/RISCVInstrInfoP.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1461,7 +1461,7 @@ let Predicates = [HasStdExtP, IsRV32] in {
14611461
// Codegen patterns
14621462
//===----------------------------------------------------------------------===//
14631463

1464-
def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>;
1464+
def riscv_absw : RVSDNode<"ABSW", SDT_RISCVIntUnaryOpW>;
14651465

14661466
def SDT_RISCVPASUB : SDTypeProfile<1, 2, [SDTCisVec<0>,
14671467
SDTCisInt<0>,

llvm/lib/Target/RISCV/RISCVRegisterInfo.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ def XLenVecI8VT : ValueTypeByHwMode<[RV32, RV64],
228228
[v4i8, v8i8]>;
229229
def XLenVecI16VT : ValueTypeByHwMode<[RV32, RV64],
230230
[v2i16, v4i16]>;
231+
def XLenVecI32VT : ValueTypeByHwMode<[RV64],
232+
[v2i32]>;
231233
def XLenRI : RegInfoByHwMode<
232234
[RV32, RV64],
233235
[RegInfo<32,32,32>, RegInfo<64,64,64>]>;
@@ -246,7 +248,7 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList>
246248
class GPRRegisterClass<dag regList>
247249
: RISCVRegisterClass<[XLenVT, XLenFVT,
248250
// P extension packed vector types:
249-
XLenVecI8VT, XLenVecI16VT, v2i32], 32, regList> {
251+
XLenVecI8VT, XLenVecI16VT, XLenVecI32VT], 32, regList> {
250252
let RegInfos = XLenRI;
251253
}
252254

llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
88
; CHECK: %j.1 = phi
99
; CHECK: %j.2 = phi
1010
; CHECK: %j.3 = phi
11+
;
12+
; fore_aft_less SHOULD be unroll-and-jammed (count=4) as it's safe.
13+
; Memory accesses:
14+
; - Fore block: A[i] = 1 (write in outer loop before inner)
15+
; - Aft block: A[i-1] = sum (write in outer loop after inner)
16+
; No dependency conflict: The fore block write A[i] and aft block write A[i-1]
17+
; access different array elements, so unrolling the outer loop and jamming the
18+
; inner loop is safe. The backward dependency (i-1) doesn't create conflicts
19+
; between different unrolled iterations.
1120
define void @fore_aft_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
1221
entry:
1322
%cmp = icmp sgt i32 %N, 0
@@ -48,6 +57,15 @@ cleanup:
4857
; CHECK: %j.1 = phi
4958
; CHECK: %j.2 = phi
5059
; CHECK: %j.3 = phi
60+
;
61+
; fore_aft_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
62+
; Memory accesses:
63+
; - Fore block: A[i] = 1 (write in outer loop before inner)
64+
; - Aft block: A[i] = sum (write in outer loop after inner)
65+
; Dependency conflict: Both fore and aft blocks write to A[i], creating a
66+
; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
67+
; because the aft block write always happens after the fore block write in
68+
; the same iteration, preserving the original execution order.
5169
define void @fore_aft_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
5270
entry:
5371
%cmp = icmp sgt i32 %N, 0
@@ -86,6 +104,15 @@ cleanup:
86104
; CHECK-LABEL: fore_aft_more
87105
; CHECK: %j = phi
88106
; CHECK-NOT: %j.1 = phi
107+
;
108+
; fore_aft_more should NOT be unroll-and-jammed due to a dependency violation.
109+
; Memory accesses:
110+
; - Fore block: A[i] = 1 (write in outer loop before inner)
111+
; - Aft block: A[i+1] = sum (write in outer loop after inner)
112+
; Dependency conflict: The fore block writes A[i] and aft block writes A[i+1].
113+
; When unroll-and-jamming, iteration i's aft block writes A[i+1] which conflicts
114+
; with iteration i+1's fore block write to A[i+1], creating a write-after-write
115+
; race condition that violates the original sequential semantics.
89116
define void @fore_aft_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
90117
entry:
91118
%cmp = icmp sgt i32 %N, 0
@@ -126,6 +153,14 @@ cleanup:
126153
; CHECK: %j.1 = phi
127154
; CHECK: %j.2 = phi
128155
; CHECK: %j.3 = phi
156+
;
157+
; fore_sub_less SHOULD be unroll-and-jammed (count=4) as it's safe.
158+
; Memory accesses:
159+
; - Fore block: A[i] = 1 (write in outer loop before inner)
160+
; - Sub block: A[i-1] = sum (write inside inner loop)
161+
; No dependency conflict: The fore block writes A[i] and sub block writes A[i-1].
162+
; These access different array elements, so unroll-and-jam is safe. The backward
163+
; dependency pattern doesn't create conflicts between unrolled iterations.
129164
define void @fore_sub_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
130165
entry:
131166
%cmp = icmp sgt i32 %N, 0
@@ -166,6 +201,15 @@ cleanup:
166201
; CHECK: %j.1 = phi
167202
; CHECK: %j.2 = phi
168203
; CHECK: %j.3 = phi
204+
;
205+
; fore_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
206+
; Memory accesses:
207+
; - Fore block: A[i] = 1 (write in outer loop before inner)
208+
; - Sub block: A[i] = sum (write inside inner loop)
209+
; Dependency conflict: Both fore and sub blocks write to A[i], creating a
210+
; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
211+
; because the execution order is preserved: fore block executes first, then
212+
; the entire inner loop (sub block) executes, maintaining the original semantics.
169213
define void @fore_sub_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
170214
entry:
171215
%cmp = icmp sgt i32 %N, 0
@@ -204,6 +248,15 @@ cleanup:
204248
; CHECK-LABEL: fore_sub_more
205249
; CHECK: %j = phi
206250
; CHECK-NOT: %j.1 = phi
251+
;
252+
; fore_sub_more should NOT be unroll-and-jammed due to a dependency violation.
253+
; Memory accesses:
254+
; - Fore block: A[i] = 1 (write in outer loop before inner)
255+
; - Sub block: A[i+1] = sum (write inside inner loop)
256+
; Dependency conflict: The fore block writes A[i] and sub block writes A[i+1].
257+
; When unroll-and-jamming, iteration i's fore block writes A[i] but iteration i's
258+
; sub block writes A[i+1]. This conflicts with iteration i+1's fore block write
259+
; to A[i+1], creating a write-after-write race condition.
207260
define void @fore_sub_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
208261
entry:
209262
%cmp = icmp sgt i32 %N, 0
@@ -244,6 +297,14 @@ cleanup:
244297
; CHECK: %j.1 = phi
245298
; CHECK: %j.2 = phi
246299
; CHECK: %j.3 = phi
300+
;
301+
; sub_aft_less SHOULD be unroll-and-jammed (count=4) as it's safe.
302+
; Memory accesses:
303+
; - Sub block: A[i] = 1 (write inside inner loop)
304+
; - Aft block: A[i-1] = sum (write in outer loop after inner)
305+
; No dependency conflict: The sub block writes A[i] and aft block writes A[i-1].
306+
; These access different array elements, so unroll-and-jam is safe. The backward
307+
; dependency pattern doesn't create conflicts between unrolled iterations.
247308
define void @sub_aft_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
248309
entry:
249310
%cmp = icmp sgt i32 %N, 0
@@ -284,6 +345,15 @@ cleanup:
284345
; CHECK: %j.1 = phi
285346
; CHECK: %j.2 = phi
286347
; CHECK: %j.3 = phi
348+
;
349+
; sub_aft_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
350+
; Memory accesses:
351+
; - Sub block: A[i] = 1 (write inside inner loop)
352+
; - Aft block: A[i] = sum (write in outer loop after inner)
353+
; Dependency conflict: Both sub and aft blocks write to A[i], creating a
354+
; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
355+
; because the execution order is preserved: the entire inner loop (sub block)
356+
; executes first, then the aft block executes, maintaining original semantics.
287357
define void @sub_aft_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
288358
entry:
289359
%cmp = icmp sgt i32 %N, 0
@@ -322,6 +392,15 @@ cleanup:
322392
; CHECK-LABEL: sub_aft_more
323393
; CHECK: %j = phi
324394
; CHECK-NOT: %j.1 = phi
395+
;
396+
; sub_aft_more should NOT be unroll-and-jammed due to a dependency violation.
397+
; Memory accesses:
398+
; - Sub block: A[i] = 1 (write inside inner loop)
399+
; - Aft block: A[i+1] = sum (write in outer loop after inner)
400+
; Dependency conflict: The sub block writes A[i] and aft block writes A[i+1].
401+
; When unroll-and-jamming, iteration i's aft block writes A[i+1] which conflicts
402+
; with iteration i+1's sub block write to A[i+1], creating a write-after-write
403+
; race condition that violates the original sequential semantics.
325404
define void @sub_aft_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
326405
entry:
327406
%cmp = icmp sgt i32 %N, 0
@@ -360,6 +439,15 @@ cleanup:
360439
; CHECK-LABEL: sub_sub_less
361440
; CHECK: %j = phi
362441
; CHECK-NOT: %j.1 = phi
442+
;
443+
; sub_sub_less should NOT be unroll-and-jammed due to a dependency violation.
444+
; Memory accesses:
445+
; - Sub block: A[i] = 1 (write inside inner loop)
446+
; - Sub block: A[i-1] = sum (write inside inner loop)
447+
; Dependency conflict: Both writes are in the sub block (inner loop), accessing
448+
; A[i] and A[i-1]. When unroll-and-jamming, the inner loop is jammed, meaning
449+
; iterations of the inner loop from different outer iterations execute together.
450+
; This creates a backward dependency that can cause race conditions.
363451
define void @sub_sub_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
364452
entry:
365453
%cmp = icmp sgt i32 %N, 0
@@ -400,6 +488,15 @@ cleanup:
400488
; CHECK: %j.1 = phi
401489
; CHECK: %j.2 = phi
402490
; CHECK: %j.3 = phi
491+
;
492+
; sub_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
493+
; Memory accesses:
494+
; - Sub block: A[i] = 1 (write inside inner loop)
495+
; - Sub block: A[i] = sum (write inside inner loop)
496+
; Dependency conflict: Both writes are to A[i] within the sub block, creating a
497+
; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
498+
; because both writes are in the same basic block and maintain their relative
499+
; order: A[i] = 1 always executes before A[i] = sum in each iteration.
403500
define void @sub_sub_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
404501
entry:
405502
%cmp = icmp sgt i32 %N, 0
@@ -438,6 +535,15 @@ cleanup:
438535
; CHECK-LABEL: sub_sub_more
439536
; CHECK: %j = phi
440537
; CHECK-NOT: %j.1 = phi
538+
;
539+
; sub_sub_more should NOT be unroll-and-jammed due to a dependency violation.
540+
; Memory accesses:
541+
; - Sub block: A[i] = 1 (write inside inner loop)
542+
; - Sub block: A[i+1] = sum (write inside inner loop)
543+
; Dependency conflict: Both writes are in the sub block, accessing A[i] and A[i+1].
544+
; When unroll-and-jamming, iteration i's sub block writes A[i+1] which conflicts
545+
; with iteration i+1's sub block write to A[i+1]. This creates a forward
546+
; dependency that causes write-after-write race conditions.
441547
define void @sub_sub_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
442548
entry:
443549
%cmp = icmp sgt i32 %N, 0

mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp

Lines changed: 70 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,6 +1285,71 @@ struct WgToSgVectorTransposeOp
12851285
}
12861286
};
12871287

1288+
// This pattern distributes the vector.constant_mask ops to work at subgroup
1289+
// level.
1290+
struct WgToSgVectorConstantMaskOp
1291+
: public OpConversionPattern<vector::ConstantMaskOp> {
1292+
using OpConversionPattern<vector::ConstantMaskOp>::OpConversionPattern;
1293+
1294+
LogicalResult
1295+
matchAndRewrite(vector::ConstantMaskOp op, OneToNOpAdaptor adaptor,
1296+
ConversionPatternRewriter &rewriter) const override {
1297+
xegpu::DistributeLayoutAttr layout =
1298+
xegpu::getDistributeLayoutAttr(op.getResult());
1299+
if (!layout || !layout.isForWorkgroup())
1300+
return failure();
1301+
1302+
Location loc = op.getLoc();
1303+
VectorType type = op.getResult().getType();
1304+
auto wgShape = type.getShape();
1305+
1306+
ArrayRef<int64_t> wgMaskDimSizes = op.getMaskDimSizes();
1307+
1308+
// Get subgroup ID.
1309+
Value sgId =
1310+
gpu::SubgroupIdOp::create(rewriter, loc, /*upper_bound=*/nullptr);
1311+
auto sgOffsets =
1312+
layout.computeDistributedCoords(rewriter, loc, sgId, wgShape);
1313+
if (failed(sgOffsets))
1314+
return failure();
1315+
1316+
SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
1317+
VectorType resultType = VectorType::get(sgShape, type.getElementType());
1318+
1319+
// In each dimension, each subgroup computes its local mask size as:
1320+
// min(max(wgMaskSize[d] - offset[d], 0), sgDimSize[d])
1321+
SmallVector<Value> newCreateMaskOps;
1322+
for (auto offsetSet : *sgOffsets) {
1323+
SmallVector<Value> maskOperands;
1324+
1325+
for (auto [i, wgMaskSize] : llvm::enumerate(wgMaskDimSizes)) {
1326+
Value wgMaskSizeVal =
1327+
arith::ConstantIndexOp::create(rewriter, loc, wgMaskSize);
1328+
Value dimSizeVal =
1329+
arith::ConstantIndexOp::create(rewriter, loc, sgShape[i]);
1330+
Value offset = offsetSet[i];
1331+
Value adjustedMaskSize =
1332+
arith::SubIOp::create(rewriter, loc, wgMaskSizeVal, offset);
1333+
Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0);
1334+
Value nonNegative =
1335+
arith::MaxSIOp::create(rewriter, loc, adjustedMaskSize, zero);
1336+
Value sgMaskSize =
1337+
arith::MinSIOp::create(rewriter, loc, nonNegative, dimSizeVal);
1338+
maskOperands.push_back(sgMaskSize);
1339+
}
1340+
1341+
auto newCreateMaskOp =
1342+
vector::CreateMaskOp::create(rewriter, loc, resultType, maskOperands);
1343+
xegpu::setDistributeLayoutAttr(newCreateMaskOp->getResult(0),
1344+
layout.dropSgLayoutAndData());
1345+
newCreateMaskOps.push_back(newCreateMaskOp.getResult());
1346+
}
1347+
1348+
rewriter.replaceOpWithMultiple(op, {newCreateMaskOps});
1349+
return success();
1350+
}
1351+
};
1352+
12881353
} // namespace
12891354

12901355
namespace mlir {
@@ -1299,8 +1364,8 @@ void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns) {
12991364
WgToSgArithConstantOp, WgToSgLoadGatherOpWithOffset,
13001365
WgToSgStoreScatterOpWithOffset, WgToSgLoadMatrixOp,
13011366
WgToSgStoreMatrixOp, WgToSgVectorStepOp, WgToSgVectorShapeCastOp,
1302-
WgToSgMultiDimReductionOp, WgToSgVectorTransposeOp>(
1303-
patterns.getContext());
1367+
WgToSgMultiDimReductionOp, WgToSgVectorTransposeOp,
1368+
WgToSgVectorConstantMaskOp>(patterns.getContext());
13041369
}
13051370
} // namespace xegpu
13061371
} // namespace mlir
@@ -1427,9 +1492,9 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
14271492
return isLegal(layout);
14281493
});
14291494

1430-
target.addDynamicallyLegalOp<vector::ShapeCastOp, vector::StepOp,
1431-
vector::TransposeOp, vector::BroadcastOp,
1432-
vector::MultiDimReductionOp>(
1495+
target.addDynamicallyLegalOp<
1496+
vector::ShapeCastOp, vector::StepOp, vector::TransposeOp,
1497+
vector::BroadcastOp, vector::MultiDimReductionOp, vector::ConstantMaskOp>(
14331498
[=](Operation *op) -> bool {
14341499
// Check for either a SliceAttr or LayoutAttr on the result.
14351500
auto layout = xegpu::getDistributeLayoutAttr(op->getResult(0));

mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops-rr.mlir

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,5 +130,13 @@ gpu.module @test_distribution {
130130
%trans = vector.transpose %load, [1, 0] {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [16, 32], lane_layout = [1, 16], lane_data = [1, 1], order =[1, 0]>} : vector<256x128xf32> to vector<128x256xf32>
131131
gpu.return
132132
}
133+
134+
// CHECK-LABEL: vector_mask_2D
135+
gpu.func @vector_mask_2D() {
136+
// CHECK-COUNT-4: vector.create_mask {{.*}}, {{.*}} : vector<16x16xi1>
137+
// CHECK-NOT: vector.create_mask
138+
%constant_mask = vector.constant_mask [16, 16] {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16]>} : vector<256x128xi1>
139+
gpu.return
140+
}
133141
}
134142

0 commit comments

Comments
 (0)