We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9bc9d7f commit 47fac97Copy full SHA for 47fac97
third_party/intel/lib/TritonIntelGPUTransforms/OptimizeElementwiseParallelism.cpp
@@ -104,8 +104,12 @@ bool optimizationDoesNotWorsenRegisterPressure(
104
if (auto convertLayout = dyn_cast<ConvertLayoutOp>(owner))
105
return convertLayout.getResult().getType() == newType;
106
107
+ // Broadcasted in source.
108
+ if (isa<ExpandDimsOp>(owner))
109
+ return true;
110
+
111
// Allow for loop optimizations.
- if (auto yield = dyn_cast<scf::YieldOp>(owner))
112
+ if (isa<scf::YieldOp>(owner))
113
return true;
114
115
// Only allow candidates. Check only operation constraints. We do not have
0 commit comments