@@ -630,9 +630,9 @@ static bool checkForElementwiseUsersWithNewOperands(linalg::LinalgOp linalgOp) {
630630static FailureOr<std::pair<LoweringConfigAttr, int64_t >>
631631getMatmulOrIGEMMLoweringConfigAndWorkgroupSize (
632632 ArrayRef<int64_t > bounds, ArrayRef<AffineMap> maps,
633- ArrayRef<Value> operands, IREE::GPU::TargetAttr target, bool useDirectLoad ,
634- bool isGemm, bool scaled, int64_t splitReductionTripCnt,
635- bool CPromoteIfPadding, bool hasExistingAccumulator = false ,
633+ ArrayRef<Value> operands, IREE::GPU::TargetAttr target, bool isGemm ,
634+ bool scaled, int64_t splitReductionTripCnt, bool CPromoteIfPadding ,
635+ bool hasExistingAccumulator = false ,
636636 std::optional<ConvToIgemmInfo> convToIgemmInfo = std::nullopt ) {
637637 if (target.getWgp ().getMma ().empty ()) {
638638 return failure ();
@@ -924,9 +924,9 @@ getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
924924 }
925925 // Do not use direct load DMA when padding is needed, as the source will
926926 // go through tensor.pad and won't be directly from global memory.
927- ArrayRef<Attribute> promotionTypes = (useDirectLoad && !couldNeedPadding)
928- ? ArrayRef<Attribute>(promotionArray)
929- : ArrayRef<Attribute>{} ;
927+ ArrayRef<Attribute> promotionTypes =
928+ couldNeedPadding ? ArrayRef<Attribute>{}
929+ : ArrayRef<Attribute>(promotionArray) ;
930930 GPU::appendPromotedOperandsList (context, attrs, promotionList,
931931 promotionTypes);
932932 if (!mustBeAligned || couldNeedPadding) {
@@ -966,9 +966,10 @@ getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
966966 return std::pair{loweringConfig, flatWorkgroupSize};
967967}
968968
969- LogicalResult setIGEMMConvolutionLoweringConfig (
970- IREE::GPU::TargetAttr target, mlir::FunctionOpInterface entryPoint,
971- Operation *op, bool useDirectLoad, bool padConv) {
969+ LogicalResult
970+ setIGEMMConvolutionLoweringConfig (IREE::GPU::TargetAttr target,
971+ mlir::FunctionOpInterface entryPoint,
972+ Operation *op, bool padConv) {
972973 auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
973974 if (!linalgOp || !linalg::isaConvolutionOpInterface (linalgOp)) {
974975 return failure ();
@@ -1042,7 +1043,7 @@ LogicalResult setIGEMMConvolutionLoweringConfig(
10421043 FailureOr<std::pair<LoweringConfigAttr, int64_t >> configAndWgSize =
10431044 getMatmulOrIGEMMLoweringConfigAndWorkgroupSize (
10441045 igemmLoopBounds, igemmContractionMaps, igemmOperands, target,
1045- useDirectLoad, /* isGemm=*/ false ,
1046+ /* isGemm=*/ false ,
10461047 /* scaled=*/ false , splitReductionTripCnt,
10471048 /* CPromoteIfPadding=*/ CPromoteIfPadding, hasExistingAccumulator,
10481049 convToIgemmInfo);
@@ -1055,7 +1056,7 @@ LogicalResult setIGEMMConvolutionLoweringConfig(
10551056 SmallVector<NamedAttribute, 1 > pipelineAttrs;
10561057 auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get (
10571058 linalgOp->getContext (), /* prefetchNumStages=*/ 2 ,
1058- /* no_reduce_shared_memory_bank_conflicts=*/ useDirectLoad ,
1059+ /* no_reduce_shared_memory_bank_conflicts=*/ true ,
10591060 /* use_igemm_convolution=*/ true ,
10601061 /* reorder_workgroups_strategy=*/ std::nullopt );
10611062 pipelineAttrs.emplace_back (
@@ -1073,7 +1074,7 @@ LogicalResult setIGEMMConvolutionLoweringConfig(
10731074
10741075LogicalResult setMatmulLoweringConfig (IREE::GPU::TargetAttr target,
10751076 mlir::FunctionOpInterface entryPoint,
1076- Operation *op, bool useDirectLoad ) {
1077+ Operation *op) {
10771078 auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
10781079 if (!linalgOp ||
10791080 (!linalg::isaContractionOpInterface (linalgOp) &&
@@ -1100,18 +1101,15 @@ LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
11001101
11011102 FailureOr<std::pair<LoweringConfigAttr, int64_t >> configAndWgSize =
11021103 getMatmulOrIGEMMLoweringConfigAndWorkgroupSize (
1103- bounds, maps, operands, target, useDirectLoad, /* isGemm=*/ true ,
1104+ bounds, maps, operands, target, /* isGemm=*/ true ,
11041105 /* scaled=*/ false , splitReductionTripCnt, CPromoteIfPadding,
11051106 hasExistingAccumulator);
11061107
11071108 // TODO (muzasyed) : add generalization for scaled and nonscaled versions of
11081109 // matmul lowering.
11091110 if (failed (configAndWgSize)) {
1110- // TODO (muzasyed) : Perform padding appropriately for minimizing bank
1111- // conflicts when dealing with scaled matmuls. For now it is disabled.
1112- useDirectLoad = true ;
11131111 configAndWgSize = getMatmulOrIGEMMLoweringConfigAndWorkgroupSize (
1114- bounds, maps, operands, target, useDirectLoad, /* isGemm=*/ true ,
1112+ bounds, maps, operands, target, /* isGemm=*/ true ,
11151113 /* scaled=*/ true , splitReductionTripCnt, CPromoteIfPadding,
11161114 hasExistingAccumulator);
11171115 }
@@ -1125,7 +1123,7 @@ LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
11251123 SmallVector<NamedAttribute, 1 > pipelineAttrs;
11261124 auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get (
11271125 linalgOp->getContext (), /* prefetchNumStages=*/ 2 ,
1128- /* no_reduce_shared_memory_bank_conflicts=*/ useDirectLoad ,
1126+ /* no_reduce_shared_memory_bank_conflicts=*/ true ,
11291127 /* use_igemm_convolution=*/ false ,
11301128 /* reorder_workgroups_strategy=*/ std::nullopt );
11311129 pipelineAttrs.emplace_back (
0 commit comments