@@ -146,6 +146,35 @@ class ROCDL_DimGetterFunctionOp<string mnemonic, string device_function,
146146 ];
147147}
148148
149+ //===----------------------------------------------------------------------===//
150+ // ROCDL vector types definitions
151+ //===----------------------------------------------------------------------===//
152+
153+ class ROCDL_ConcreteVector<Type elem, int length> :
154+ FixedVectorOfLengthAndType<[length], [elem]>,
155+ BuildableType<
156+ "::mlir::VectorType::get({" # length # "} ,"
157+ # elem.builderCall # ")">;
158+
159+ def ROCDL_V2I16Type : ROCDL_ConcreteVector<I16, 2>;
160+ def ROCDL_V2F16Type : ROCDL_ConcreteVector<F16, 2>;
161+ def ROCDL_V2I32Type : ROCDL_ConcreteVector<I32, 2>;
162+ def ROCDL_V2BF16Type : ROCDL_ConcreteVector<BF16, 2>;
163+ def ROCDL_V2F32Type : ROCDL_ConcreteVector<F32, 2>;
164+ def ROCDL_V3I32Type : ROCDL_ConcreteVector<I32, 3>;
165+ def ROCDL_V4I32Type : ROCDL_ConcreteVector<I32, 4>;
166+ def ROCDL_V6I32Type : ROCDL_ConcreteVector<I32, 6>;
167+ def ROCDL_V8I32Type : ROCDL_ConcreteVector<I32, 8>;
168+ def ROCDL_V8BF16Type : ROCDL_ConcreteVector<BF16, 8>;
169+ def ROCDL_V8F16Type : ROCDL_ConcreteVector<F16, 8>;
170+ def ROCDL_V8F32Type : ROCDL_ConcreteVector<F32, 8>;
171+ def ROCDL_V16BF16Type : ROCDL_ConcreteVector<BF16, 16>;
172+ def ROCDL_V16F16Type : ROCDL_ConcreteVector<F16, 16>;
173+ def ROCDL_V16F32Type : ROCDL_ConcreteVector<F32, 16>;
174+ def ROCDL_V32F16Type : ROCDL_ConcreteVector<F16, 32>;
175+ def ROCDL_V32BF16Type : ROCDL_ConcreteVector<BF16, 32>;
176+ def ROCDL_V32F32Type : ROCDL_ConcreteVector<F32, 32>;
177+
149178//===----------------------------------------------------------------------===//
150179// Wave-level primitives
151180//===----------------------------------------------------------------------===//
@@ -805,6 +834,65 @@ def ROCDL_RawBufferAtomicCmpSwap :
805834 }];
806835}
807836
837+ //===---------------------------------------------------------------------===//
838+ // Raw tensor load/store intrinsics: gfx12+
839+
840+ def ROCDL_TensorLoadToLds :
841+ ROCDL_IntrOp<"tensor.load.to.lds", [], [], [], 0, 0, 0, 0, [4], ["cpol"]>,
842+ Arguments<(ins ROCDL_V4I32Type:$desc0,
843+ ROCDL_V8I32Type:$desc1,
844+ ROCDL_V4I32Type:$desc2,
845+ ROCDL_V4I32Type:$desc3,
846+ I32Attr:$cpol)>{
847+ let description = [{
848+ Loads tensor data from Global to LDS. Available on gfx12+.
849+ }];
850+ let assemblyFormat = [{
851+ attr-dict operands `cachepolicy` $cpol
852+ }];
853+ }
854+
855+ def ROCDL_TensorLoadToLdsD2 :
856+ ROCDL_IntrOp<"tensor.load.to.lds.d2", [], [], [], 0, 0, 0, 0, [2], ["cpol"]>,
857+ Arguments<(ins ROCDL_V4I32Type:$desc0,
858+ ROCDL_V8I32Type:$desc1,
859+ I32Attr:$cpol)>{
860+ let description = [{
861+ Loads 2D tensor data from Global to LDS. Available on gfx12+. TODO
862+ }];
863+ let assemblyFormat = [{
864+ attr-dict operands `cachepolicy` $cpol
865+ }];
866+ }
867+
868+ def ROCDL_TensorStoreFromLds :
869+ ROCDL_IntrOp<"tensor.store.from.lds", [], [], [], 0, 0, 0, 0, [4], ["cpol"]>,
870+ Arguments<(ins ROCDL_V4I32Type:$desc0,
871+ ROCDL_V8I32Type:$desc1,
872+ ROCDL_V4I32Type:$desc2,
873+ ROCDL_V4I32Type:$desc3,
874+ I32Attr:$cpol)>{
875+ let description = [{
876+ Stores tensor data from Global to LDS. Available on gfx12+.
877+ }];
878+ let assemblyFormat = [{
879+ attr-dict operands `cachepolicy` $cpol
880+ }];
881+ }
882+
883+ def ROCDL_TensorStoreFromLdsD2 :
884+ ROCDL_IntrOp<"tensor.store.from.lds.d2", [], [], [], 0, 0, 0, 0, [2], ["cpol"]>,
885+ Arguments<(ins ROCDL_V4I32Type:$desc0,
886+ ROCDL_V8I32Type:$desc1,
887+ I32Attr:$cpol)>{
888+ let description = [{
889+ Stores tensor 2D data from Global to LDS. Available on gfx12+. TODO
890+ }];
891+ let assemblyFormat = [{
892+ attr-dict operands `cachepolicy` $cpol
893+ }];
894+ }
895+
808896//===---------------------------------------------------------------------===//
809897// MI-100 and MI-200 buffer atomic floating point add intrinsic
810898
@@ -932,30 +1020,6 @@ def ROCDL_Permlane32SwapOp : ROCDL_IntrOp<"permlane32.swap", [], [],
9321020 }];
9331021}
9341022
935- class ROCDL_ConcreteVector<Type elem, int length> :
936- FixedVectorOfLengthAndType<[length], [elem]>,
937- BuildableType<
938- "::mlir::VectorType::get({" # length # "} ,"
939- # elem.builderCall # ")">;
940-
941- def ROCDL_V2I16Type : ROCDL_ConcreteVector<I16, 2>;
942- def ROCDL_V2F16Type : ROCDL_ConcreteVector<F16, 2>;
943- def ROCDL_V2I32Type : ROCDL_ConcreteVector<I32, 2>;
944- def ROCDL_V2BF16Type : ROCDL_ConcreteVector<BF16, 2>;
945- def ROCDL_V2F32Type : ROCDL_ConcreteVector<F32, 2>;
946- def ROCDL_V3I32Type : ROCDL_ConcreteVector<I32, 3>;
947- def ROCDL_V6I32Type : ROCDL_ConcreteVector<I32, 6>;
948- def ROCDL_V8I32Type : ROCDL_ConcreteVector<I32, 8>;
949- def ROCDL_V8BF16Type : ROCDL_ConcreteVector<BF16, 8>;
950- def ROCDL_V8F16Type : ROCDL_ConcreteVector<F16, 8>;
951- def ROCDL_V8F32Type : ROCDL_ConcreteVector<F32, 8>;
952- def ROCDL_V16BF16Type : ROCDL_ConcreteVector<BF16, 16>;
953- def ROCDL_V16F16Type : ROCDL_ConcreteVector<F16, 16>;
954- def ROCDL_V16F32Type : ROCDL_ConcreteVector<F32, 16>;
955- def ROCDL_V32F16Type : ROCDL_ConcreteVector<F16, 32>;
956- def ROCDL_V32BF16Type : ROCDL_ConcreteVector<BF16, 32>;
957- def ROCDL_V32F32Type : ROCDL_ConcreteVector<F32, 32>;
958-
9591023//===---------------------------------------------------------------------===//
9601024// 16-bit float intrinsics
9611025//===---------------------------------------------------------------------===//
0 commit comments