@@ -47,6 +47,7 @@ class KrnlMatmulLowering : public ConversionPattern {
4747 typeConverter, KrnlMatMulOp::getOperationName(), 1, context) {
4848 this ->parallelEnabled = parallelEnabled;
4949 }
50+
5051 bool parallelEnabled = false ;
5152
5253 LogicalResult matchAndRewrite (Operation *op, ArrayRef<Value> operands,
@@ -302,7 +303,7 @@ class KrnlMatmulLowering : public ConversionPattern {
302303 // Have to privatize CTmpType by unroll factor (1 if none).
303304 MemRefType CTmpType = MemRefType::get ({unrollFactor}, elementType);
304305 assert (BUFFER_ALIGN >= gDefaultAllocAlign );
305- //
306+
306307 if (parallelEnabled)
307308 return createMemRef.alignedAlloc (CTmpType, BUFFER_ALIGN);
308309 return createMemRef.alignedAlloca (CTmpType, BUFFER_ALIGN);
@@ -394,6 +395,7 @@ class KrnlMatmulLowering : public ConversionPattern {
394395 // the parallel loop, which is not great. TODO: migrate alloca from inside
395396 // the parallel loop to the OMP parallel region before the loop.
396397 // Grep for this pattern in all 3 instances of "parallelEnabled".
398+
397399 if (parallelEnabled)
398400 return create.mem .alignedAlloc (CTmpType, BUFFER_ALIGN);
399401 return create.mem .alignedAlloca (CTmpType, BUFFER_ALIGN);
@@ -498,6 +500,7 @@ class KrnlMatmulLowering : public ConversionPattern {
498500 // But at this time, if parallel is enabled, alloca would be stuck inside of
499501 // the parallel loop, which is not great. TODO: migrate alloca from inside
500502 // the parallel loop to the OMP parallel region before the loop.
503+
501504 if (parallelEnabled)
502505 return create.mem .alignedAlloc (CTmpType, BUFFER_ALIGN);
503506 return create.mem .alignedAlloca (CTmpType, BUFFER_ALIGN);
0 commit comments