We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 793a3ae commit d2e9b1bCopy full SHA for d2e9b1b
third_party/intel/lib/TritonIntelGPUTransforms/OptimizeElementwiseParallelism.cpp
@@ -104,8 +104,12 @@ bool optimizationDoesNotWorsenRegisterPressure(
104
if (auto convertLayout = dyn_cast<ConvertLayoutOp>(owner))
105
return convertLayout.getResult().getType() == newType;
106
107
+ // Broadcasted in source.
108
+ if (isa<ExpandDimsOp>(owner))
109
+ return true;
110
+
111
// Allow for loop optimizations.
- if (auto yield = dyn_cast<scf::YieldOp>(owner))
112
+ if (isa<scf::YieldOp>(owner))
113
return true;
114
115
// Only allow candidates. Check only operation constraints. We do not have
0 commit comments