Skip to content

Commit fd523e5

Browse files
committed
new avx2 f16 ops in x86vector dialect to handle f16 conversions to f32
1 parent f6b6fb8 commit fd523e5

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed

mlir/include/mlir/Dialect/X86Vector/X86Vector.td

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,4 +527,126 @@ def BcstBF16ToPackedF32Op : AVX_Op<"bcst.bf16_to_f32.packed", [MemoryEffects<[Me
527527

528528
}
529529

530+
//----------------------------------------------------------------------------//
531+
// AVX: Convert packed F16 even-indexed/odd-indexed elements into packed F32
532+
//----------------------------------------------------------------------------//
533+
534+
def CvtPackedEvenIndexedF16ToF32Op : AVX_Op<"cvt.packed.even.indexed.f16_to_f32", [MemoryEffects<[MemRead]>,
535+
DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
536+
let summary = "AVX: Convert packed F16 even-indexed elements into packed F32 Data.";
537+
let description = [{
538+
539+
#### From the Intel Intrinsics Guide:
540+
541+
Convert packed F16 (16-bit) floating-point even-indexed elements stored at
542+
memory locations starting at location `__A` to packed single-precision
543+
(32-bit) floating-point elements, and store the results in `dst`.
544+
545+
Example:
546+
```mlir
547+
%dst = x86vector.avx.cvt.packed.even.indexed.f16_to_f32 %a : !llvm.ptr -> vector<8xf32>
548+
```
549+
}];
550+
let arguments = (ins AnyMemRef:$a);
551+
let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
552+
let assemblyFormat =
553+
"$a attr-dict`:` type($a)`->` type($dst)";
554+
555+
let extraClassDefinition = [{
556+
std::string $cppClass::getIntrinsicName() {
557+
std::string intr = "llvm.x86.vcvtneeph2ps";
558+
VectorType vecType = getDst().getType();
559+
unsigned elemBitWidth = vecType.getElementTypeBitWidth();
560+
unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
561+
intr += std::to_string(opBitWidth);
562+
return intr;
563+
}
564+
}];
565+
566+
let extraClassDeclaration = [{
567+
SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
568+
}];
569+
}
570+
571+
def CvtPackedOddIndexedF16ToF32Op : AVX_Op<"cvt.packed.odd.indexed.f16_to_f32", [MemoryEffects<[MemRead]>,
572+
DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
573+
let summary = "AVX: Convert packed F16 odd-indexed elements into packed F32 Data.";
574+
let description = [{
575+
576+
#### From the Intel Intrinsics Guide:
577+
578+
Convert packed F16 (16-bit) floating-point odd-indexed elements stored at
579+
memory locations starting at location `__A` to packed single-precision
580+
(32-bit) floating-point elements, and store the results in `dst`.
581+
582+
Example:
583+
```mlir
584+
%dst = x86vector.avx.cvt.packed.odd.indexed.f16_to_f32 %a : !llvm.ptr -> vector<8xf32>
585+
```
586+
}];
587+
let arguments = (ins AnyMemRef:$a);
588+
let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
589+
let assemblyFormat =
590+
"$a attr-dict`:` type($a)`->` type($dst)";
591+
592+
let extraClassDefinition = [{
593+
std::string $cppClass::getIntrinsicName() {
594+
std::string intr = "llvm.x86.vcvtneoph2ps";
595+
VectorType vecType = getDst().getType();
596+
unsigned elemBitWidth = vecType.getElementTypeBitWidth();
597+
unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
598+
intr += std::to_string(opBitWidth);
599+
return intr;
600+
}
601+
}];
602+
603+
let extraClassDeclaration = [{
604+
SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
605+
}];
606+
}
607+
608+
//----------------------------------------------------------------------------//
609+
// AVX: Convert F16 to F32 and broadcast into packed F32
610+
//----------------------------------------------------------------------------//
611+
612+
def BcstF16ToPackedF32Op : AVX_Op<"bcst.f16_to_f32.packed", [MemoryEffects<[MemRead]>,
613+
DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
614+
let summary = "AVX: Broadcasts F16 into packed F32 Data.";
615+
616+
let description = [{
617+
618+
#### From the Intel Intrinsics Guide:
619+
620+
Convert scalar F16 (16-bit) floating-point element stored at memory locations
621+
starting at location `__A` to a single-precision (32-bit) floating-point,
622+
broadcast it to packed single-precision (32-bit) floating-point elements,
623+
and store the results in `dst`.
624+
625+
Example:
626+
```mlir
627+
%dst = x86vector.avx.bcst.f16_to_f32.packed %a : !llvm.ptr -> vector<8xf32>
628+
```
629+
}];
630+
let arguments = (ins AnyMemRef:$a);
631+
let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
632+
let assemblyFormat =
633+
"$a attr-dict`:` type($a)`->` type($dst)";
634+
635+
let extraClassDefinition = [{
636+
std::string $cppClass::getIntrinsicName() {
637+
std::string intr = "llvm.x86.vbcstnesh2ps";
638+
VectorType vecType = getDst().getType();
639+
unsigned elemBitWidth = vecType.getElementTypeBitWidth();
640+
unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
641+
intr += std::to_string(opBitWidth);
642+
return intr;
643+
}
644+
}];
645+
646+
let extraClassDeclaration = [{
647+
SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
648+
}];
649+
650+
}
651+
530652
#endif // X86VECTOR_OPS

mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,5 +112,22 @@ x86vector::CvtPackedEvenIndexedBF16ToF32Op::getIntrinsicOperands(
112112
return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter);
113113
}
114114

115+
SmallVector<Value>
116+
x86vector::CvtPackedEvenIndexedF16ToF32Op::getIntrinsicOperands(
117+
RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) {
118+
return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter);
119+
}
120+
121+
SmallVector<Value>
122+
x86vector::CvtPackedOddIndexedF16ToF32Op::getIntrinsicOperands(
123+
RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) {
124+
return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter);
125+
}
126+
127+
SmallVector<Value> x86vector::BcstF16ToPackedF32Op::getIntrinsicOperands(
128+
RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) {
129+
return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter);
130+
}
131+
115132
#define GET_OP_CLASSES
116133
#include "mlir/Dialect/X86Vector/X86Vector.cpp.inc"

0 commit comments

Comments
 (0)