Skip to content

Commit 22b7ec9

Browse files
committed
Reduce PR footprint
Signed-off-by: Tiotto, Ettore <[email protected]>
1 parent ae3d625 commit 22b7ec9

File tree

2 files changed

+10
-74
lines changed

2 files changed

+10
-74
lines changed

third_party/intel/lib/TritonIntelGPUTransforms/RemoveLayoutConversions.cpp

Lines changed: 2 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -305,29 +305,8 @@ bool hasConvertToMMATransisitiveUse(Operation *op, Attribute encoding) {
305305
// Return true if the op is an op with a layout we don't want to change. We will
306306
// propagate the layout starting from anchor ops.
307307
bool isLayoutAnchor(Operation *op) {
308-
if (isa<LoadOp>(op)) {
309-
#ifdef HACK
310-
// Note: currently block ptr loads are always considered not expensive and
311-
// therefore they are never layout anchors.
312-
Value base = op->getOperand(0);
313-
auto parentLoop = op->getParentOfType<scf::ForOp>();
314-
bool isInLoop = parentLoop != nullptr;
315-
bool isTensorPtrLoad = mlir::triton::isTensorPointerType(base.getType());
316-
317-
if (!isTensorPtrLoad)
318-
ttgi::isExpensiveLoadOrStore(op);
319-
320-
// HACK: consider block ptr loads expensive if they are in a loop.
321-
return isInLoop;
322-
#else
308+
if (isa<LoadOp, StoreOp>(op))
323309
return ttgi::isExpensiveLoadOrStore(op);
324-
#endif
325-
}
326-
327-
if (isa<StoreOp>(op)) {
328-
return ttgi::isExpensiveLoadOrStore(op);
329-
}
330-
331310
if (isa<DotOp, AtomicCASOp>(op))
332311
return true;
333312
if (isa<AtomicRMWOp>(op))
@@ -377,17 +356,6 @@ void LayoutPropagation::initAnchorLayout() {
377356
}
378357
}
379358
});
380-
381-
#if 0
382-
llvm::errs() << "Initial layouts:\n";
383-
for (auto &entry : layouts) {
384-
llvm::errs() << entry.first << "\n";
385-
for (auto &layout : entry.second.encodings) {
386-
llvm::errs() << " " << layout << "\n";
387-
}
388-
}
389-
llvm::errs() << "\n\n";
390-
#endif
391359
}
392360

393361
void LayoutPropagation::setEncoding(ValueRange values, LayoutInfo &info,
@@ -1001,28 +969,8 @@ Operation *LayoutPropagation::rewriteOp(Operation *op) {
1001969
}
1002970

1003971
bool canBeRemat(Operation *op) {
1004-
if (isa<LoadOp>(op)) {
1005-
#ifdef HACK
1006-
// Note: currently block ptr loads are always considered not expensive and
1007-
// therefore rematerializable.
1008-
Value base = op->getOperand(0);
1009-
auto parentLoop = op->getParentOfType<scf::ForOp>();
1010-
bool isInLoop = parentLoop != nullptr;
1011-
bool isTensorPtrLoad = mlir::triton::isTensorPointerType(base.getType());
1012-
1013-
if (!isTensorPtrLoad)
1014-
return !ttgi::isExpensiveLoadOrStore(op);
1015-
1016-
// HACK: consider block ptr loads expensive if they are in a loop.
1017-
return !isInLoop;
1018-
#else
1019-
return !ttgi::isExpensiveLoadOrStore(op);
1020-
#endif
1021-
}
1022-
1023-
if (isa<StoreOp>(op))
972+
if (isa<LoadOp, StoreOp>(op))
1024973
return !ttgi::isExpensiveLoadOrStore(op);
1025-
1026974
if (isa<AtomicRMWOp, AtomicCASOp, DotOp>(op))
1027975
return false;
1028976
if (isa<scf::WhileOp, scf::ConditionOp>(op))

third_party/intel/lib/TritonIntelGPUTransforms/Utility.cpp

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -86,34 +86,22 @@ bool isExpensiveLoadOrStore(Operation *op) {
8686
"Expecting Triton LoadOp or StoreOp");
8787
Value base = op->getOperand(0);
8888

89-
// Case 1: A size 1 tensor is not expensive since all threads will load the
90-
// same
89+
// A size 1 tensor is not expensive since all threads will load the same
90+
// value.
9191
if (isSingleValue(base))
9292
return false;
9393

94-
// Case 2: Tensor of pointers has more threads than elements
95-
// we can presume a high hit-rate that makes it cheap to load
96-
97-
// IDEA: Block pointers loads are expensive if:
98-
// - they cannot be lowered to 2D block reads (they feed a dot operation)
99-
// - temporarily we can look at the "triton_intel_gpu.block_io" attribute,
100-
// if it has it it can be lowered to 2D block reads
101-
//
102-
//
103-
104-
#define NEW 1
105-
#ifdef NEW
94+
// Loads that use a block pointer are expensive if they cannot be lowered to
95+
// 2D block read operations. Temporarily leverage the
96+
// "triton_intel_gpu.block_io" attribute to filter out inexpensive loads.
10697
Attribute blockIOAttr =
10798
op->getAttr(TritonIntelGPUDialect::getBlockIOAttrName());
108-
if (blockIOAttr) {
109-
llvm::errs() << "load op: " << *op << " is not expensive\n";
99+
if (blockIOAttr)
110100
return false;
111-
}
112101

102+
// Loads that use more threads than elements can be presumed to have a high
103+
// hit-rate that makes them cheap to load.
113104
if (auto ptrType = getRankedTensorType(base.getType())) {
114-
#else
115-
if (auto ptrType = dyn_cast<RankedTensorType>(base.getType())) {
116-
#endif
117105
auto mod = op->getParentOfType<ModuleOp>();
118106
int numWarps = ttg::TritonGPUDialect::getNumWarps(mod);
119107
int threadsPerWarp = ttg::TritonGPUDialect::getThreadsPerWarp(mod);

0 commit comments

Comments
 (0)