We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3b3c852 commit a4ab31dCopy full SHA for a4ab31d
lib/Dialect/TritonNvidiaGPU/Transforms/OptimizeTMemLayouts.cpp
@@ -363,8 +363,11 @@ class TMemToSharedMemPattern : public OpRewritePattern<TMEMLoadOp> {
363
SmallVector<std::pair<Value, Attribute>> uses;
364
uses.push_back({tmemLoadOp.getResult(), newEncoding});
365
bool foundImprovedStore = false;
366
+ llvm::DenseSet<std::pair<Value, Attribute>> visited;
367
while (!uses.empty()) {
368
auto [v, encoding] = uses.pop_back_val();
369
+ if (!visited.insert({v, encoding}).second)
370
+ continue;
371
for (auto user : v.getUsers()) {
372
if (auto localStore = dyn_cast<gpu::LocalStoreOp>(user)) {
373
// Check if the store benefits from the new layout.
0 commit comments