@@ -83,7 +83,7 @@ def MaskCompressOp : AVX512_Op<"mask.compress", [Pure,
8383 }
8484 }];
8585 let extraClassDeclaration = [{
86- SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&);
86+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter& );
8787 }];
8888}
8989
@@ -404,8 +404,127 @@ def DotOp : AVX_LowOp<"dot", [Pure,
404404 }
405405 }];
406406 let extraClassDeclaration = [{
407- SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&);
407+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter& );
408408 }];
409409}
410410
411+
412+ //----------------------------------------------------------------------------//
413+ // AVX: Convert packed BF16 even-indexed/odd-indexed elements into packed F32
414+ //----------------------------------------------------------------------------//
415+
416+ def CvtPackedEvenIndexedBF16ToF32Op : AVX_Op<"cvt.packed.even.indexed.bf16_to_f32", [MemoryEffects<[MemRead]>,
417+ DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
418+ let summary = "AVX: Convert packed BF16 even-indexed elements into packed F32 Data.";
419+ let description = [{
420+ #### From the Intel Intrinsics Guide:
421+
422+ Convert packed BF16 (16-bit) floating-point even-indexed elements stored at
423+ memory locations starting at location `__A` to packed single-precision
424+ (32-bit) floating-point elements, and store the results in `dst`.
425+
426+ Example:
427+ ```mlir
428+ %dst = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
429+ ```
430+ }];
431+ let arguments = (ins AnyMemRef:$a);
432+ let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
433+ let assemblyFormat =
434+ "$a attr-dict`:` type($a)`->` type($dst)";
435+
436+ let extraClassDefinition = [{
437+ std::string $cppClass::getIntrinsicName() {
438+ std::string intr = "llvm.x86.vcvtneebf162ps";
439+ VectorType vecType = getDst().getType();
440+ unsigned elemBitWidth = vecType.getElementTypeBitWidth();
441+ unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
442+ intr += std::to_string(opBitWidth);
443+ return intr;
444+ }
445+ }];
446+
447+ let extraClassDeclaration = [{
448+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
449+ }];
450+ }
451+
452+ def CvtPackedOddIndexedBF16ToF32Op : AVX_Op<"cvt.packed.odd.indexed.bf16_to_f32", [MemoryEffects<[MemRead]>,
453+ DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
454+ let summary = "AVX: Convert packed BF16 odd-indexed elements into packed F32 Data.";
455+ let description = [{
456+ #### From the Intel Intrinsics Guide:
457+
458+ Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at
459+ memory locations starting at location `__A` to packed single-precision
460+ (32-bit) floating-point elements, and store the results in `dst`.
461+
462+ Example:
463+ ```mlir
464+ %dst = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
465+ ```
466+ }];
467+ let arguments = (ins AnyMemRef:$a);
468+ let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
469+ let assemblyFormat =
470+ "$a attr-dict`:` type($a)`->` type($dst)";
471+
472+ let extraClassDefinition = [{
473+ std::string $cppClass::getIntrinsicName() {
474+ std::string intr = "llvm.x86.vcvtneobf162ps";
475+ VectorType vecType = getDst().getType();
476+ unsigned elemBitWidth = vecType.getElementTypeBitWidth();
477+ unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
478+ intr += std::to_string(opBitWidth);
479+ return intr;
480+ }
481+ }];
482+
483+ let extraClassDeclaration = [{
484+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
485+ }];
486+ }
487+
488+ //----------------------------------------------------------------------------//
489+ // AVX: Convert BF16 to F32 and broadcast into packed F32
490+ //----------------------------------------------------------------------------//
491+
492+ def BcstBF16ToPackedF32Op : AVX_Op<"bcst.bf16_to_f32.packed", [MemoryEffects<[MemRead]>,
493+ DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
494+ let summary = "AVX: Broadcasts BF16 into packed F32 Data.";
495+ let description = [{
496+ #### From the Intel Intrinsics Guide:
497+
498+ Convert scalar BF16 (16-bit) floating-point element stored at memory locations
499+ starting at location `__A` to a single-precision (32-bit) floating-point,
500+ broadcast it to packed single-precision (32-bit) floating-point elements,
501+ and store the results in `dst`.
502+
503+ Example:
504+ ```mlir
505+ %dst = x86vector.avx.bcst.bf16_to_f32.packed %a : memref<1xbf16> -> vector<8xf32>
506+ ```
507+ }];
508+ let arguments = (ins AnyMemRef:$a);
509+ let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
510+ let assemblyFormat =
511+ "$a attr-dict`:` type($a)`->` type($dst)";
512+
513+ let extraClassDefinition = [{
514+ std::string $cppClass::getIntrinsicName() {
515+ std::string intr = "llvm.x86.vbcstnebf162ps";
516+ VectorType vecType = getDst().getType();
517+ unsigned elemBitWidth = vecType.getElementTypeBitWidth();
518+ unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
519+ intr += std::to_string(opBitWidth);
520+ return intr;
521+ }
522+ }];
523+
524+ let extraClassDeclaration = [{
525+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
526+ }];
527+
528+ }
529+
411530#endif // X86VECTOR_OPS
0 commit comments