@@ -124,9 +124,10 @@ def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
124124def GpuTilingAndFusion : Pass<"gpu-tiling", "func::FuncOp"> {
125125 let summary = "GPU tiling and fusion path.";
126126 let description = [{
127- This pass tiles linalg operations and creates an inner loop that is mapped to the block sizes, when converting
128- to gpu.launch. The tiles calculation is based on the GPU device properties, retrieved from the DLTI attributes.
129- If the DLTI attributes are not specified, defaults to the pass options.
127+ This pass tiles linalg operations and creates two nested csf.forall loops. When converting to gpu.launch,
128+ the inner loop is mapped to the block sizes and the outer - to grid sizes. The tiles calculation is based
129+ on the GPU device properties, retrieved from the DLTI attributes. If the DLTI attributes are not specified,
130+ defaults to the pass options.
130131 }];
131132 let options = [
132133 Option<"numEus", "num-eus", "size_t",
@@ -143,18 +144,7 @@ def GpuTilingAndFusion : Pass<"gpu-tiling", "func::FuncOp"> {
143144 "Execution Unit cache size.">,
144145 Option<"vectorWidth", "vector-width", "size_t",
145146 /*default=*/"512",
146- "The maximum width of EU's vector registers.">
147- ];
148- }
149-
150- def GpuLoopTiling : Pass<"gpu-loop-tiling", "func::FuncOp"> {
151- let summary = "Create nested parallel loops to be mapped to GPU.";
152- let description = [{
153- This pass tiles the loops created by the GpuTilingAndFusion pass and converted to parallel loops. The tiles
154- calculation is based on the max_work_group_size DLTI attribute. If the attribute is not specified,
155- defaults to the pass options.
156- }];
157- let options = [
147+ "The maximum width of EU's vector registers.">,
158148 Option<"workGroupSize", "work-group-size", "size_t",
159149 /*default=*/"64",
160150 "The maximum workgroup size.">
0 commit comments