Skip to content

Commit 860ccf7

Browse files
committed
new x86 avx instructions: vbcstnebf162ps, vcvtneebf162ps, vcvtneobf162ps
1 parent 0283bb3 commit 860ccf7

File tree

8 files changed

+347
-1
lines changed

8 files changed

+347
-1
lines changed

mlir/include/mlir/Dialect/X86Vector/X86Vector.td

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,4 +408,110 @@ def DotOp : AVX_LowOp<"dot", [Pure,
408408
}];
409409
}
410410

411+
412+
//----------------------------------------------------------------------------//
413+
// AVX: Convert packed BF16 even-indexed/odd-indexed elements into packed F32
414+
//----------------------------------------------------------------------------//
415+
416+
def CvtPackedEvenIndexedBF16ToF32Op : AVX_Op<"cvt.packed.even.indexed.bf16_to_f32", [Pure,
417+
DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
418+
let summary = "AVX: Convert packed BF16 even-indexed elements into packed F32 Data.";
419+
let description = [{
420+
#### From the Intel Intrinsics Guide:
421+
422+
Convert packed BF16 (16-bit) floating-point even-indexed elements stored at
423+
memory locations starting at location `__A` to packed single-precision
424+
(32-bit) floating-point elements, and store the results in `dst`.
425+
426+
Example:
427+
```mlir
428+
%dst = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xbf16>
429+
```
430+
}];
431+
let arguments = (ins LLVM_AnyPointer:$a);
432+
let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
433+
let assemblyFormat =
434+
"$a attr-dict`:` type($a)`->` type($dst)";
435+
436+
let extraClassDefinition = [{
437+
std::string $cppClass::getIntrinsicName() {
438+
std::string intr = "llvm.x86.vcvtneebf162ps";
439+
VectorType vecType = getDst().getType();
440+
unsigned elemBitWidth = vecType.getElementTypeBitWidth();
441+
unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
442+
intr += std::to_string(opBitWidth);
443+
return intr;
444+
}
445+
}];
446+
}
447+
448+
def CvtPackedOddIndexedBF16ToF32Op : AVX_Op<"cvt.packed.odd.indexed.bf16_to_f32", [Pure,
449+
DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
450+
let summary = "AVX: Convert packed BF16 odd-indexed elements into packed F32 Data.";
451+
let description = [{
452+
#### From the Intel Intrinsics Guide:
453+
454+
Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at
455+
memory locations starting at location `__A` to packed single-precision
456+
(32-bit) floating-point elements, and store the results in `dst`.
457+
458+
Example:
459+
```mlir
460+
%dst = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xbf16>
461+
```
462+
}];
463+
let arguments = (ins LLVM_AnyPointer:$a);
464+
let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
465+
let assemblyFormat =
466+
"$a attr-dict`:` type($a)`->` type($dst)";
467+
468+
let extraClassDefinition = [{
469+
std::string $cppClass::getIntrinsicName() {
470+
std::string intr = "llvm.x86.vcvtneobf162ps";
471+
VectorType vecType = getDst().getType();
472+
unsigned elemBitWidth = vecType.getElementTypeBitWidth();
473+
unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
474+
intr += std::to_string(opBitWidth);
475+
return intr;
476+
}
477+
}];
478+
}
479+
480+
//----------------------------------------------------------------------------//
481+
// AVX: Convert BF16 to F32 and broadcast into packed F32
482+
//----------------------------------------------------------------------------//
483+
484+
def BcstBF16ToPackedF32Op : AVX_Op<"bcst.bf16_to_f32.packed", [Pure,
485+
DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
486+
let summary = "AVX: Broadcasts BF16 into packed F32 Data.";
487+
let description = [{
488+
#### From the Intel Intrinsics Guide:
489+
490+
Convert scalar BF16 (16-bit) floating-point element stored at memory locations
491+
starting at location `__A` to a single-precision (32-bit) floating-point,
492+
broadcast it to packed single-precision (32-bit) floating-point elements,
493+
and store the results in `dst`.
494+
495+
Example:
496+
```mlir
497+
%dst = x86vector.avx.bcst.bf16_to_f32.packed %a : !llvm.ptr -> vector<8xbf16>
498+
```
499+
}];
500+
let arguments = (ins LLVM_AnyPointer:$a);
501+
let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
502+
let assemblyFormat =
503+
"$a attr-dict`:` type($a)`->` type($dst)";
504+
505+
let extraClassDefinition = [{
506+
std::string $cppClass::getIntrinsicName() {
507+
std::string intr = "llvm.x86.vbcstnebf162ps";
508+
VectorType vecType = getDst().getType();
509+
unsigned elemBitWidth = vecType.getElementTypeBitWidth();
510+
unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
511+
intr += std::to_string(opBitWidth);
512+
return intr;
513+
}
514+
}];
515+
}
516+
411517
#endif // X86VECTOR_OPS

mlir/include/mlir/Dialect/X86Vector/X86VectorDialect.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "mlir/IR/PatternMatch.h"
2222
#include "mlir/Interfaces/InferTypeOpInterface.h"
2323
#include "mlir/Interfaces/SideEffectInterfaces.h"
24+
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
2425

2526
/// Include the generated interface declarations.
2627
#include "mlir/Dialect/X86Vector/X86VectorInterfaces.h.inc"

mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ void mlir::populateX86VectorLegalizeForLLVMExportPatterns(
115115
void mlir::configureX86VectorLegalizeForExportTarget(
116116
LLVMConversionTarget &target) {
117117
target.addIllegalOp<MaskCompressOp, MaskRndScaleOp, MaskScaleFOp,
118-
Vp2IntersectOp, DotBF16Op, CvtPackedF32ToBF16Op, RsqrtOp,
118+
Vp2IntersectOp, DotBF16Op, CvtPackedF32ToBF16Op, CvtPackedEvenIndexedBF16ToF32Op,
119+
CvtPackedOddIndexedBF16ToF32Op, BcstBF16ToPackedF32Op, RsqrtOp,
119120
DotOp>();
120121
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// REQUIRES: target=x86{{.*}}
2+
3+
// RUN: mlir-opt %s \
4+
// RUN: -convert-vector-to-llvm="enable-x86vector" -convert-to-llvm \
5+
// RUN: -reconcile-unrealized-casts | \
6+
// RUN: mlir-translate --mlir-to-llvmir | \
7+
// RUN: llc -mcpu=sierraforest | \
8+
// RUN: FileCheck %s
9+
10+
func.func @avxbf16_bcst_bf16_to_f32_packed_128(%arg0: !llvm.ptr) -> vector<4xf32> {
11+
%0 = x86vector.avx.bcst.bf16_to_f32.packed %arg0 : !llvm.ptr -> vector<4xf32>
12+
return %0 : vector<4xf32>
13+
}
14+
// CHECK-LABEL: avxbf16_bcst_bf16_to_f32_packed_128:
15+
// CHECK: vbcstnebf162ps{{.*}}%xmm
16+
17+
func.func @avxbf16_bcst_bf16_to_f32_packed_256(%arg0: !llvm.ptr) -> vector<8xf32> {
18+
%0 = x86vector.avx.bcst.bf16_to_f32.packed %arg0 : !llvm.ptr -> vector<8xf32>
19+
return %0 : vector<8xf32>
20+
}
21+
// CHECK-LABEL: avxbf16_bcst_bf16_to_f32_packed_256:
22+
// CHECK: vbcstnebf162ps{{.*}}%ymm
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// REQUIRES: target=x86{{.*}}
2+
3+
// RUN: mlir-opt %s \
4+
// RUN: -convert-vector-to-llvm="enable-x86vector" -convert-to-llvm \
5+
// RUN: -reconcile-unrealized-casts | \
6+
// RUN: mlir-translate --mlir-to-llvmir | \
7+
// RUN: llc -mcpu=sierraforest | \
8+
// RUN: FileCheck %s
9+
10+
func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_128(%arg0: memref<8xbf16>) -> vector<4xf32> {
11+
%intptr = memref.extract_aligned_pointer_as_index %arg0 : memref<8xbf16> -> index
12+
%0 = arith.index_cast %intptr : index to i32
13+
%1 = llvm.inttoptr %0 : i32 to !llvm.ptr
14+
%2 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %1 : !llvm.ptr -> vector<4xf32>
15+
return %2 : vector<4xf32>
16+
}
17+
// CHECK-LABEL: avxbf16_cvt_packed_even_indexed_bf16_to_f32_128:
18+
// CHECK: vcvtneebf162ps{{.*}}%xmm
19+
20+
func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_256(%arg0: memref<16xbf16>) -> vector<8xf32> {
21+
%intptr = memref.extract_aligned_pointer_as_index %arg0 : memref<16xbf16> -> index
22+
%0 = arith.index_cast %intptr : index to i32
23+
%1 = llvm.inttoptr %0 : i32 to !llvm.ptr
24+
%2 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %1 : !llvm.ptr -> vector<8xf32>
25+
return %2 : vector<8xf32>
26+
}
27+
// CHECK-LABEL: avxbf16_cvt_packed_even_indexed_bf16_to_f32_256:
28+
// CHECK: vcvtneebf162ps{{.*}}%ymm
29+
30+
func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128(%arg0: memref<8xbf16>) -> vector<4xf32> {
31+
%intptr = memref.extract_aligned_pointer_as_index %arg0 : memref<8xbf16> -> index
32+
%0 = arith.index_cast %intptr : index to i32
33+
%1 = llvm.inttoptr %0 : i32 to !llvm.ptr
34+
%2 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %1 : !llvm.ptr -> vector<4xf32>
35+
return %2 : vector<4xf32>
36+
}
37+
// CHECK-LABEL: avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128:
38+
// CHECK: vcvtneobf162ps{{.*}}%xmm
39+
40+
func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256(%arg0: memref<16xbf16>) -> vector<8xf32> {
41+
%intptr = memref.extract_aligned_pointer_as_index %arg0 : memref<16xbf16> -> index
42+
%0 = arith.index_cast %intptr : index to i32
43+
%1 = llvm.inttoptr %0 : i32 to !llvm.ptr
44+
%2 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %1 : !llvm.ptr -> vector<8xf32>
45+
return %2 : vector<8xf32>
46+
}
47+
// CHECK-LABEL: avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256:
48+
// CHECK: vcvtneobf162ps{{.*}}%ymm

mlir/test/Dialect/X86Vector/legalize-for-llvm.mlir

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,60 @@ func.func @avx512bf16_cvt_packed_f32_to_bf16_512(
9595
return %0 : vector<16xbf16>
9696
}
9797

98+
// CHECK-LABEL: func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_128
99+
func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_128(
100+
%a: !llvm.ptr) -> vector<4xf32>
101+
{
102+
// CHECK: llvm.call_intrinsic "llvm.x86.vcvtneebf162ps128"
103+
%0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<4xf32>
104+
return %0 : vector<4xf32>
105+
}
106+
107+
// CHECK-LABEL: func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_256
108+
func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_256(
109+
%a: !llvm.ptr) -> vector<8xf32>
110+
{
111+
// CHECK: llvm.call_intrinsic "llvm.x86.vcvtneebf162ps256"
112+
%0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xf32>
113+
return %0 : vector<8xf32>
114+
}
115+
116+
// CHECK-LABEL: func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128
117+
func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128(
118+
%a: !llvm.ptr) -> vector<4xf32>
119+
{
120+
// CHECK: llvm.call_intrinsic "llvm.x86.vcvtneobf162ps128"
121+
%0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<4xf32>
122+
return %0 : vector<4xf32>
123+
}
124+
125+
// CHECK-LABEL: func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256
126+
func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256(
127+
%a: !llvm.ptr) -> vector<8xf32>
128+
{
129+
// CHECK: llvm.call_intrinsic "llvm.x86.vcvtneobf162ps256"
130+
%0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xf32>
131+
return %0 : vector<8xf32>
132+
}
133+
134+
// CHECK-LABEL: func @avxbf16_bsct_bf16_to_f32_packed_128
135+
func.func @avxbf16_bsct_bf16_to_f32_packed_128(
136+
%a: !llvm.ptr) -> vector<4xf32>
137+
{
138+
// CHECK: llvm.call_intrinsic "llvm.x86.vbcstnebf162ps128"
139+
%0 = x86vector.avx.bcst.bf16_to_f32.packed %a : !llvm.ptr -> vector<4xf32>
140+
return %0 : vector<4xf32>
141+
}
142+
143+
// CHECK-LABEL: func @avxbf16_bsct_bf16_to_f32_packed_256
144+
func.func @avxbf16_bsct_bf16_to_f32_packed_256(
145+
%a: !llvm.ptr) -> vector<8xf32>
146+
{
147+
// CHECK: llvm.call_intrinsic "llvm.x86.vbcstnebf162ps256"
148+
%0 = x86vector.avx.bcst.bf16_to_f32.packed %a : !llvm.ptr -> vector<8xf32>
149+
return %0 : vector<8xf32>
150+
}
151+
98152
// CHECK-LABEL: func @avx_rsqrt
99153
func.func @avx_rsqrt(%a: vector<8xf32>) -> (vector<8xf32>)
100154
{

mlir/test/Dialect/X86Vector/roundtrip.mlir

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,66 @@ func.func @avx512bf16_cvt_packed_f32_to_bf16_512(
9494
return %0 : vector<16xbf16>
9595
}
9696

97+
// CHECK-LABEL: func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_128
98+
func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_128(
99+
%a: !llvm.ptr) -> vector<4xf32>
100+
{
101+
// CHECK: x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 {{.*}} :
102+
// CHECK-SAME: !llvm.ptr -> vector<4xf32>
103+
%0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<4xf32>
104+
return %0 : vector<4xf32>
105+
}
106+
107+
// CHECK-LABEL: func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_256
108+
func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_256(
109+
%a: !llvm.ptr) -> vector<8xf32>
110+
{
111+
// CHECK: x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 {{.*}} :
112+
// CHECK-SAME: !llvm.ptr -> vector<8xf32>
113+
%0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xf32>
114+
return %0 : vector<8xf32>
115+
}
116+
117+
// CHECK-LABEL: func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128
118+
func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128(
119+
%a: !llvm.ptr) -> vector<4xf32>
120+
{
121+
// CHECK: x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 {{.*}} :
122+
// CHECK-SAME: !llvm.ptr -> vector<4xf32>
123+
%0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<4xf32>
124+
return %0 : vector<4xf32>
125+
}
126+
127+
// CHECK-LABEL: func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256
128+
func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256(
129+
%a: !llvm.ptr) -> vector<8xf32>
130+
{
131+
// CHECK: x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 {{.*}} :
132+
// CHECK-SAME: !llvm.ptr -> vector<8xf32>
133+
%0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xf32>
134+
return %0 : vector<8xf32>
135+
}
136+
137+
// CHECK-LABEL: func @avxbf16_bcst_bf16_to_f32_128
138+
func.func @avxbf16_bcst_bf16_to_f32_128(
139+
%a: !llvm.ptr) -> vector<4xf32>
140+
{
141+
// CHECK: x86vector.avx.bcst.bf16_to_f32.packed {{.*}} :
142+
// CHECK-SAME: !llvm.ptr -> vector<4xf32>
143+
%0 = x86vector.avx.bcst.bf16_to_f32.packed %a : !llvm.ptr -> vector<4xf32>
144+
return %0 : vector<4xf32>
145+
}
146+
147+
// CHECK-LABEL: func @avxbf16_bcst_bf16_to_f32_256
148+
func.func @avxbf16_bcst_bf16_to_f32_256(
149+
%a: !llvm.ptr) -> vector<8xf32>
150+
{
151+
// CHECK: x86vector.avx.bcst.bf16_to_f32.packed {{.*}} :
152+
// CHECK-SAME: !llvm.ptr -> vector<8xf32>
153+
%0 = x86vector.avx.bcst.bf16_to_f32.packed %a : !llvm.ptr -> vector<8xf32>
154+
return %0 : vector<8xf32>
155+
}
156+
97157
// CHECK-LABEL: func @avx_rsqrt
98158
func.func @avx_rsqrt(%a: vector<8xf32>) -> (vector<8xf32>)
99159
{

mlir/test/Target/LLVMIR/x86vector.mlir

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,60 @@ func.func @LLVM_x86_avx512bf16_cvtneps2bf16_512(
109109
return %0 : vector<16xbf16>
110110
}
111111

112+
// CHECK-LABEL: define <4 x float> @LLVM_x86_avxbf16_vcvtneebf162ps128
113+
func.func @LLVM_x86_avxbf16_vcvtneebf162ps128(
114+
%a: !llvm.ptr) -> vector<4xf32>
115+
{
116+
// CHECK: call <4 x float> @llvm.x86.vcvtneebf162ps128(
117+
%0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<4xf32>
118+
return %0 : vector<4xf32>
119+
}
120+
121+
// CHECK-LABEL: define <8 x float> @LLVM_x86_avxbf16_vcvtneebf162ps256
122+
func.func @LLVM_x86_avxbf16_vcvtneebf162ps256(
123+
%a: !llvm.ptr) -> vector<8xf32>
124+
{
125+
// CHECK: call <8 x float> @llvm.x86.vcvtneebf162ps256(
126+
%0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xf32>
127+
return %0 : vector<8xf32>
128+
}
129+
130+
// CHECK-LABEL: define <4 x float> @LLVM_x86_avxbf16_vcvtneobf162ps128
131+
func.func @LLVM_x86_avxbf16_vcvtneobf162ps128(
132+
%a: !llvm.ptr) -> vector<4xf32>
133+
{
134+
// CHECK: call <4 x float> @llvm.x86.vcvtneobf162ps128(
135+
%0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<4xf32>
136+
return %0 : vector<4xf32>
137+
}
138+
139+
// CHECK-LABEL: define <8 x float> @LLVM_x86_avxbf16_vcvtneobf162ps256
140+
func.func @LLVM_x86_avxbf16_vcvtneobf162ps256(
141+
%a: !llvm.ptr) -> vector<8xf32>
142+
{
143+
// CHECK: call <8 x float> @llvm.x86.vcvtneobf162ps256(
144+
%0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : !llvm.ptr -> vector<8xf32>
145+
return %0 : vector<8xf32>
146+
}
147+
148+
// CHECK-LABEL: define <4 x float> @LLVM_x86_avxbf16_vbcstnebf162ps128
149+
func.func @LLVM_x86_avxbf16_vbcstnebf162ps128(
150+
%a: !llvm.ptr) -> vector<4xf32>
151+
{
152+
// CHECK: call <4 x float> @llvm.x86.vbcstnebf162ps128(
153+
%0 = x86vector.avx.bcst.bf16_to_f32.packed %a : !llvm.ptr -> vector<4xf32>
154+
return %0 : vector<4xf32>
155+
}
156+
157+
// CHECK-LABEL: define <8 x float> @LLVM_x86_avxbf16_vbcstnebf162ps256
158+
func.func @LLVM_x86_avxbf16_vbcstnebf162ps256(
159+
%a: !llvm.ptr) -> vector<8xf32>
160+
{
161+
// CHECK: call <8 x float> @llvm.x86.vbcstnebf162ps256(
162+
%0 = x86vector.avx.bcst.bf16_to_f32.packed %a : !llvm.ptr -> vector<8xf32>
163+
return %0 : vector<8xf32>
164+
}
165+
112166
// CHECK-LABEL: define <8 x float> @LLVM_x86_avx_rsqrt_ps_256
113167
func.func @LLVM_x86_avx_rsqrt_ps_256(%a: vector <8xf32>) -> vector<8xf32>
114168
{

0 commit comments

Comments
 (0)