@@ -132,24 +132,24 @@ def MapNestedForallToThreads :
132132 TransformEachOpTrait,
133133 TransformOpInterface]> {
134134 let description = [{
135- Target the `gpu.launch op` and rewrite all `scf.forall` nested in it to
135+ Target the `gpu.launch op` and rewrite all `scf.forall` nested in it to
136136 distributed `gpu.thread_id` attribute.
137137
138138 The operation searches for `scf.forall` ops nested under `target` and maps
139- each such op to GPU threads.
140-
139+ each such op to GPU threads.
140+
141141 `scf.forall` induction variables are rewritten to `gpu.thread_id` according
142142 to the `mapping` attribute.
143143
144144 Different types of mappings attributes are supported:
145145 - the block_dims is a list of integers that specifies the number of
146146 threads in each dimension. This is a mandatory attribute that is used
147- to constrain the number of threads in each dimension. If an
147+ to constrain the number of threads in each dimension. If an
148148 `scf.forall` op is mapped to fewer threads, predication occurs.
149149 - the warp_dims is a list of integers that specifies the number of
150150 warps in each dimension. This is an optional attribute that is used
151151 to constrain the number of warps in each dimension. When present, this
152- attribute must be specified in a way that is compatible with the
152+ attribute must be specified in a way that is compatible with the
153153 block_dims attribute. If an `scf.forall` op is mapped to fewer warps,
154154 predication occurs.
155155
@@ -164,7 +164,7 @@ def MapNestedForallToThreads :
164164 inserted after each scf.forall op. At this time, this is an all or nothing
165165 choice. This will need to be tightened in the future.
166166
167- The operation alters the block size of the given gpu_launch using the
167+ The operation alters the block size of the given gpu_launch using the
168168 mandatory block_dims argument.
169169
170170 #### Return modes:
@@ -268,7 +268,7 @@ def MapForallToBlocks :
268268 Only scf.forall distributed to **at most 3 dimensions** are
269269 currently supported.
270270
271- The operation alters the block size of the given gpu_launch using the
271+ The operation alters the block size of the given gpu_launch using the
272272 grid_dims argument.
273273
274274 #### Return modes:
@@ -300,7 +300,7 @@ def MapForallToBlocks :
300300 `:` functional-type($target, $result)
301301 }];
302302 let hasVerifier = 1;
303-
303+
304304 let extraClassDeclaration = [{
305305 ::mlir::DiagnosedSilenceableFailure applyToOne(
306306 ::mlir::transform::TransformRewriter &rewriter,
@@ -310,4 +310,15 @@ def MapForallToBlocks :
310310 }];
311311}
312312
313+ def ApplyGPUPromoteShuffleToAMDGPUPatternsOp : Op<Transform_Dialect,
314+ "apply_patterns.gpu.gpu_shuffle_to_amdgpu",
315+ [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
316+ let description = [{
317+ Collects patterns that are tryin to promote `gpu.shuffle`s to specialized
318+ AMDGPU intrinsics.
319+ }];
320+ let assemblyFormat = "attr-dict";
321+ }
322+
323+
313324#endif // GPU_TRANSFORM_OPS
0 commit comments