@@ -83,7 +83,7 @@ def MaskCompressOp : AVX512_Op<"mask.compress", [Pure,
83
83
}
84
84
}];
85
85
let extraClassDeclaration = [{
86
- SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&);
86
+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter& );
87
87
}];
88
88
}
89
89
@@ -404,8 +404,127 @@ def DotOp : AVX_LowOp<"dot", [Pure,
404
404
}
405
405
}];
406
406
let extraClassDeclaration = [{
407
- SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&);
407
+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter& );
408
408
}];
409
409
}
410
410
411
+
412
+ //----------------------------------------------------------------------------//
413
+ // AVX: Convert packed BF16 even-indexed/odd-indexed elements into packed F32
414
+ //----------------------------------------------------------------------------//
415
+
416
+ def CvtPackedEvenIndexedBF16ToF32Op : AVX_Op<"cvt.packed.even.indexed.bf16_to_f32", [MemoryEffects<[MemRead]>,
417
+ DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
418
+ let summary = "AVX: Convert packed BF16 even-indexed elements into packed F32 Data.";
419
+ let description = [{
420
+ #### From the Intel Intrinsics Guide:
421
+
422
+ Convert packed BF16 (16-bit) floating-point even-indexed elements stored at
423
+ memory locations starting at location `__A` to packed single-precision
424
+ (32-bit) floating-point elements, and store the results in `dst`.
425
+
426
+ Example:
427
+ ```mlir
428
+ %dst = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
429
+ ```
430
+ }];
431
+ let arguments = (ins AnyMemRef:$a);
432
+ let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
433
+ let assemblyFormat =
434
+ "$a attr-dict`:` type($a)`->` type($dst)";
435
+
436
+ let extraClassDefinition = [{
437
+ std::string $cppClass::getIntrinsicName() {
438
+ std::string intr = "llvm.x86.vcvtneebf162ps";
439
+ VectorType vecType = getDst().getType();
440
+ unsigned elemBitWidth = vecType.getElementTypeBitWidth();
441
+ unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
442
+ intr += std::to_string(opBitWidth);
443
+ return intr;
444
+ }
445
+ }];
446
+
447
+ let extraClassDeclaration = [{
448
+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
449
+ }];
450
+ }
451
+
452
+ def CvtPackedOddIndexedBF16ToF32Op : AVX_Op<"cvt.packed.odd.indexed.bf16_to_f32", [MemoryEffects<[MemRead]>,
453
+ DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
454
+ let summary = "AVX: Convert packed BF16 odd-indexed elements into packed F32 Data.";
455
+ let description = [{
456
+ #### From the Intel Intrinsics Guide:
457
+
458
+ Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at
459
+ memory locations starting at location `__A` to packed single-precision
460
+ (32-bit) floating-point elements, and store the results in `dst`.
461
+
462
+ Example:
463
+ ```mlir
464
+ %dst = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
465
+ ```
466
+ }];
467
+ let arguments = (ins AnyMemRef:$a);
468
+ let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
469
+ let assemblyFormat =
470
+ "$a attr-dict`:` type($a)`->` type($dst)";
471
+
472
+ let extraClassDefinition = [{
473
+ std::string $cppClass::getIntrinsicName() {
474
+ std::string intr = "llvm.x86.vcvtneobf162ps";
475
+ VectorType vecType = getDst().getType();
476
+ unsigned elemBitWidth = vecType.getElementTypeBitWidth();
477
+ unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
478
+ intr += std::to_string(opBitWidth);
479
+ return intr;
480
+ }
481
+ }];
482
+
483
+ let extraClassDeclaration = [{
484
+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
485
+ }];
486
+ }
487
+
488
+ //----------------------------------------------------------------------------//
489
+ // AVX: Convert BF16 to F32 and broadcast into packed F32
490
+ //----------------------------------------------------------------------------//
491
+
492
+ def BcstBF16ToPackedF32Op : AVX_Op<"bcst.bf16_to_f32.packed", [MemoryEffects<[MemRead]>,
493
+ DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
494
+ let summary = "AVX: Broadcasts BF16 into packed F32 Data.";
495
+ let description = [{
496
+ #### From the Intel Intrinsics Guide:
497
+
498
+ Convert scalar BF16 (16-bit) floating-point element stored at memory locations
499
+ starting at location `__A` to a single-precision (32-bit) floating-point,
500
+ broadcast it to packed single-precision (32-bit) floating-point elements,
501
+ and store the results in `dst`.
502
+
503
+ Example:
504
+ ```mlir
505
+ %dst = x86vector.avx.bcst.bf16_to_f32.packed %a : memref<1xbf16> -> vector<8xf32>
506
+ ```
507
+ }];
508
+ let arguments = (ins AnyMemRef:$a);
509
+ let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
510
+ let assemblyFormat =
511
+ "$a attr-dict`:` type($a)`->` type($dst)";
512
+
513
+ let extraClassDefinition = [{
514
+ std::string $cppClass::getIntrinsicName() {
515
+ std::string intr = "llvm.x86.vbcstnebf162ps";
516
+ VectorType vecType = getDst().getType();
517
+ unsigned elemBitWidth = vecType.getElementTypeBitWidth();
518
+ unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
519
+ intr += std::to_string(opBitWidth);
520
+ return intr;
521
+ }
522
+ }];
523
+
524
+ let extraClassDeclaration = [{
525
+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
526
+ }];
527
+
528
+ }
529
+
411
530
#endif // X86VECTOR_OPS
0 commit comments