@@ -134,14 +134,21 @@ struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
134134
135135struct GPUSubgroupSizeOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp> {
136136 using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
137+
138+ GPUSubgroupSizeOpToROCDL (const LLVMTypeConverter &converter,
139+ amdgpu::Chipset chipset)
140+ : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp>(converter),
141+ chipset (chipset) {}
142+
137143 LogicalResult
138144 matchAndRewrite (gpu::SubgroupSizeOp op, gpu::SubgroupSizeOp::Adaptor adaptor,
139145 ConversionPatternRewriter &rewriter) const override {
140146 LLVM::ConstantRangeAttr bounds = nullptr ;
147+ bool isBeforeGfx10 = chipset.majorVersion < 10 ;
141148 if (auto upperBoundAttr = op.getUpperBoundAttr ()) {
142149 bounds = rewriter.getAttr <LLVM::ConstantRangeAttr>(
143- /* bitWidth=*/ 32 , /* lower=*/ 32 ,
144- /* upper=*/ op.getUpperBoundAttr ().getInt ());
150+ /* bitWidth=*/ 32 , /* lower=*/ isBeforeGfx10 ? 64 : 32 ,
151+ /* upper=*/ op.getUpperBoundAttr ().getInt () + 1 );
145152 }
146153 Value wavefrontOp = rewriter.create <ROCDL::WavefrontSizeOp>(
147154 op.getLoc (), rewriter.getI32Type (), bounds);
@@ -150,6 +157,8 @@ struct GPUSubgroupSizeOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp> {
150157 rewriter.replaceOp (op, {wavefrontOp});
151158 return success ();
152159 }
160+
161+ const amdgpu::Chipset chipset;
153162};
154163
155164struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern <gpu::ShuffleOp> {
@@ -358,7 +367,8 @@ struct LowerGpuOpsToROCDLOpsPass final
358367
359368 populateAMDGPUToROCDLConversionPatterns (converter, llvmPatterns,
360369 *maybeChipset);
361- populateGpuToROCDLConversionPatterns (converter, llvmPatterns, runtime);
370+ populateGpuToROCDLConversionPatterns (converter, llvmPatterns, runtime,
371+ *maybeChipset);
362372 configureGpuToROCDLConversionLegality (target);
363373 if (failed (applyPartialConversion (m, target, std::move (llvmPatterns))))
364374 signalPassFailure ();
@@ -406,7 +416,7 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) {
406416
407417void mlir::populateGpuToROCDLConversionPatterns (
408418 const LLVMTypeConverter &converter, RewritePatternSet &patterns,
409- mlir::gpu::amd::Runtime runtime) {
419+ mlir::gpu::amd::Runtime runtime, amdgpu::Chipset chipset ) {
410420 using gpu::index_lowering::IndexKind;
411421 using gpu::index_lowering::IntrType;
412422 using mlir::gpu::amd::Runtime;
@@ -447,6 +457,7 @@ void mlir::populateGpuToROCDLConversionPatterns(
447457 patterns
448458 .add <GPUShuffleOpLowering, GPULaneIdOpToROCDL, GPUSubgroupSizeOpToROCDL>(
449459 converter);
460+ patterns.add <GPUSubgroupSizeOpToROCDL>(converter, chipset);
450461
451462 populateMathToROCDLConversionPatterns (converter, patterns);
452463}
0 commit comments