|
| 1 | +//===----------------------------------------------------------------------===// |
| 2 | +// Base definitions shared by TritonGPU attribute TableGen files. |
| 3 | +// Splitting these out lets us emit certain attributes (e.g. CTAEncodingAttr) |
| 4 | +// before interface headers without creating circular dependencies. |
| 5 | +//===----------------------------------------------------------------------===// |
| 6 | + |
| 7 | +#ifndef TRITONGPU_ATTRBASE_TD |
| 8 | +#define TRITONGPU_ATTRBASE_TD |
| 9 | + |
| 10 | +include "mlir/IR/AttrTypeBase.td" |
| 11 | +include "triton/Dialect/Triton/IR/TritonInterfaces.td" |
| 12 | +include "triton/Dialect/TritonGPU/IR/TritonGPUDialect.td" |
| 13 | + |
| 14 | +// Traits used across several attrs. |
| 15 | +def MemDescViewTrait : NativeOpTrait<"MemDescViewTrait">; |
| 16 | +def LocalLoadTrait : NativeOpTrait<"LocalLoadTrait">; |
| 17 | + |
| 18 | +// Common parameter helpers. |
| 19 | +def LinearLayoutParam : AttrOrTypeParameter<"LinearLayout", |
| 20 | + "linear layout"> { |
| 21 | + let cppAccessorType = "const LinearLayout &"; |
| 22 | +} |
| 23 | + |
| 24 | +// Base class for all TritonGPU attributes. |
| 25 | +class TritonGPU_Attr<string name, string attrMnemonic, list<Trait> traits = []> |
| 26 | + : AttrDef<TritonGPU_Dialect, name, traits> { |
| 27 | + |
| 28 | + let description = [{ |
| 29 | +TritonGPU tensors differ from usual tensors in that they contain a _layout_ attribute which determines |
| 30 | +how the data should be partitioned across CUDA threads. Formally speaking, we define a layout as a function |
| 31 | +\mathcal{L} that maps a multi-dimensional tensor index $i \in \mathbb{Z}^d$ to a set of integers T corresponding |
| 32 | +to the indices of the CUDA threads allowed to access some data at index $i$. |
| 33 | + |
| 34 | +For example, let us consider the layout function: |
| 35 | +\mathcal{L}(0, 0) = {0, 4} |
| 36 | +\mathcal{L}(0, 1) = {1, 5} |
| 37 | +\mathcal{L}(1, 0) = {2, 6} |
| 38 | +\mathcal{L}(1, 1) = {3, 7} |
| 39 | + |
| 40 | +Then, attaching $\mathcal{L} to a tensor $T$ would mean that: |
| 41 | +- T[0,0] is owned by both cuda thread 0 and 4 |
| 42 | +- T[0,1] is owned by both cuda thread 1 and 5 |
| 43 | +- T[1,0] is owned by both cuda thread 2 and 6 |
| 44 | +- T[1,1] is owned by both cuda thread 3 and 7 |
| 45 | + |
| 46 | +Right now, Triton implements two main classes of layouts: shared, and distributed. |
| 47 | + }]; |
| 48 | + let attrName = "triton.gpu." # attrMnemonic; |
| 49 | + |
| 50 | + code extraBaseClassDeclaration = [{ |
| 51 | + }]; |
| 52 | +} |
| 53 | + |
| 54 | +#endif // TRITONGPU_ATTRBASE_TD |
0 commit comments