@@ -408,4 +408,110 @@ def DotOp : AVX_LowOp<"dot", [Pure,
408408 }];
409409}
410410
411+
412+ //----------------------------------------------------------------------------//
413+ // AVX: Convert packed BF16 even-indexed/odd-indexed elements into packed F32
414+ //----------------------------------------------------------------------------//
415+
416+ def CvtPackedEvenIndexedBF16ToF32Op : AVX_Op<"cvt.packed.even.indexed.bf16_to_f32", [Pure,
417+ DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
418+ let summary = "AVX: Convert packed BF16 even-indexed elements into packed F32 Data.";
419+ let description = [{
420+ #### From the Intel Intrinsics Guide:
421+
422+ Convert packed BF16 (16-bit) floating-point even-indexed elements stored at
423+ memory locations starting at location `__A` to packed single-precision
424+ (32-bit) floating-point elements, and store the results in `dst`.
425+
426+ Example:
427+ ```mlir
428+ %dst = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xbf16>
429+ ```
430+ }];
431+ let arguments = (ins LLVM_AnyPointer:$a);
432+ let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
433+ let assemblyFormat =
434+ "$a attr-dict`:` type($a)`->` type($dst)";
435+
436+ let extraClassDefinition = [{
437+ std::string $cppClass::getIntrinsicName() {
438+ std::string intr = "llvm.x86.vcvtneebf162ps";
439+ VectorType vecType = getDst().getType();
440+ unsigned elemBitWidth = vecType.getElementTypeBitWidth();
441+ unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
442+ intr += std::to_string(opBitWidth);
443+ return intr;
444+ }
445+ }];
446+ }
447+
448+ def CvtPackedOddIndexedBF16ToF32Op : AVX_Op<"cvt.packed.odd.indexed.bf16_to_f32", [Pure,
449+ DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
450+ let summary = "AVX: Convert packed BF16 odd-indexed elements into packed F32 Data.";
451+ let description = [{
452+ #### From the Intel Intrinsics Guide:
453+
454+ Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at
455+ memory locations starting at location `__A` to packed single-precision
456+ (32-bit) floating-point elements, and store the results in `dst`.
457+
458+ Example:
459+ ```mlir
460+ %dst = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xbf16>
461+ ```
462+ }];
463+ let arguments = (ins LLVM_AnyPointer:$a);
464+ let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
465+ let assemblyFormat =
466+ "$a attr-dict`:` type($a)`->` type($dst)";
467+
468+ let extraClassDefinition = [{
469+ std::string $cppClass::getIntrinsicName() {
470+ std::string intr = "llvm.x86.vcvtneobf162ps";
471+ VectorType vecType = getDst().getType();
472+ unsigned elemBitWidth = vecType.getElementTypeBitWidth();
473+ unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
474+ intr += std::to_string(opBitWidth);
475+ return intr;
476+ }
477+ }];
478+ }
479+
480+ //----------------------------------------------------------------------------//
481+ // AVX: Convert BF16 to F32 and broadcast into packed F32
482+ //----------------------------------------------------------------------------//
483+
484+ def BcstBF16ToPackedF32Op : AVX_Op<"bcst.bf16_to_f32.packed", [Pure,
485+ DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
486+ let summary = "AVX: Broadcasts BF16 into packed F32 Data.";
487+ let description = [{
488+ #### From the Intel Intrinsics Guide:
489+
490+ Convert scalar BF16 (16-bit) floating-point element stored at memory locations
491+ starting at location `__A` to a single-precision (32-bit) floating-point,
492+ broadcast it to packed single-precision (32-bit) floating-point elements,
493+ and store the results in `dst`.
494+
495+ Example:
496+ ```mlir
497+ %dst = x86vector.avx.bcst.bf16_to_f32.packed %a : !llvm.ptr -> vector<8xbf16>
498+ ```
499+ }];
500+ let arguments = (ins LLVM_AnyPointer:$a);
501+ let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
502+ let assemblyFormat =
503+ "$a attr-dict`:` type($a)`->` type($dst)";
504+
505+ let extraClassDefinition = [{
506+ std::string $cppClass::getIntrinsicName() {
507+ std::string intr = "llvm.x86.vbcstnebf162ps";
508+ VectorType vecType = getDst().getType();
509+ unsigned elemBitWidth = vecType.getElementTypeBitWidth();
510+ unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
511+ intr += std::to_string(opBitWidth);
512+ return intr;
513+ }
514+ }];
515+ }
516+
411517#endif // X86VECTOR_OPS
0 commit comments