File tree Expand file tree Collapse file tree 1 file changed +15
-0
lines changed
third_party/intel/lib/TritonIntelGPUTransforms Expand file tree Collapse file tree 1 file changed +15
-0
lines changed Original file line number Diff line number Diff line change 1111#include " mlir/Transforms/DialectConversion.h"
1212
1313#include " intel/include/Dialect/TritonIntelGPU/IR/Attributes.h"
14+ #include " intel/include/Dialect/TritonIntelGPU/IR/Dialect.h"
1415#include " intel/include/Dialect/TritonIntelGPU/Transforms/Utility.h"
1516#include " triton/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.h"
1617#include " triton/Dialect/TritonGPU/IR/Dialect.h"
@@ -93,8 +94,22 @@ bool isExpensiveLoadOrStore(Operation *op) {
9394 // Case 2: Tensor of pointers has more threads than elements
9495 // we can presume a high hit-rate that makes it cheap to load
9596
97+ // IDEA: Block pointers loads are expensive if:
98+ // - they cannot be lowered to 2D block reads (they feed a dot operation)
99+ // - temporarily we can look at the "triton_intel_gpu.block_io" attribute,
100+ // if it has it it can be lowered to 2D block reads
101+ //
102+ //
103+
96104#define NEW 1
97105#ifdef NEW
106+ Attribute blockIOAttr =
107+ op->getAttr (TritonIntelGPUDialect::getBlockIOAttrName ());
108+ if (blockIOAttr) {
109+ llvm::errs () << " load op: " << *op << " is not expensive\n " ;
110+ return false ;
111+ }
112+
98113 if (auto ptrType = getRankedTensorType (base.getType ())) {
99114#else
100115 if (auto ptrType = dyn_cast<RankedTensorType>(base.getType ())) {
You can’t perform that action at this time.
0 commit comments