Skip to content

Commit f1a21fe

Browse files
committed
[WIP] using splat shifts
1 parent 07a1fbe commit f1a21fe

File tree

3 files changed

+298
-5
lines changed

3 files changed

+298
-5
lines changed

mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp

Lines changed: 157 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,14 @@ struct SourceElementRangeList : public SmallVector<SourceElementRange> {
546546
/// and `vector.bitcast ... : vector<2xi15> to vector<3xi10>` is decomposed as:
547547
/// [0] = {0, [0, 10)}, {1, [0, 5)}
548548
/// [1] = {1, [5, 10)}, {2, [0, 10)}
549+
/// and `vector.bitcast ... : vector<4xi4> to vector<2xi8>` is decomposed as:
550+
/// [0] = {0, [0, 4)}, {1, [0, 4)}
551+
/// [1] = {2, [0, 4)}, {3, [0, 4)}
552+
/// and `vector.bitcast ... : vector<2xi8> to vector<4xi4>` is decomposed as:
553+
/// [0] = {0, [0, 4)}
554+
/// [1] = {0, [4, 8)}
555+
/// [2] = {1, [0, 4)}
556+
/// [3] = {1, [4, 8)}
549557
struct BitCastBitsEnumerator {
550558
BitCastBitsEnumerator(VectorType sourceVectorType,
551559
VectorType targetVectorType);
@@ -633,6 +641,35 @@ struct BitCastBitsEnumerator {
633641
/// `(shuffle -> and -> shiftright -> shiftleft -> or)` to iteratively update
634642
/// the result vector (i.e. the `shiftright -> shiftleft -> or` part) with the
635643
/// bits extracted from the source vector (i.e. the `shuffle -> and` part).
644+
///
645+
///
646+
/// When we consider the above algorithm to rewrite our vector.bitcast, we rely
647+
/// on using dynamic shift amounts for the left and right shifts. This can be
648+
/// inefficient on certain targets (RDNA GPUs) in contrast to a splat constant
649+
/// value. So when possible we can rewrite this as a combination of shifts with
650+
/// a constant splat value and then regroup the selected terms.
651+
///
652+
/// Eg. Instead of:
653+
/// res = arith.shrui x [0, 4, 8, 0, 4, 8]
654+
/// use:
655+
/// y = arith.shrui x [0, 0, 0, 0, 0, 0] (can be folded away)
656+
/// y1 = arith.shrui x [4, 4, 4, 4, 4, 4]
657+
/// y2 = arith.shrui x [8, 8, 8, 8, 8, 8]
658+
/// y3 = vector.shuffle y y1 [0, 7, 3, 10]
659+
/// res = vector.shuffle y3 y2 [0, 1, 7, 2, 3, 10]
660+
///
661+
/// This is possible when the precomputed shift amounts following a cyclic
662+
/// pattern of [x, y, z, ..., x, y, z, ...] such that the cycle length,
663+
/// cycleLen, satisifies 1 < cycleLen < size(shiftAmounts). And the shuffles are
664+
/// of the form [0, 0, 0, ..., 1, 1, 1, ...]. A common pattern in
665+
/// (de)quantization, i24 -> 3xi8 or 3xi8 -> i24. The modified algorithm follows
666+
/// the same 2 steps as above, then it proceeds as follows:
667+
///
668+
/// 2. for each element in the cycle, x, of the rightShiftAmounts create a
669+
/// shrui with a splat constant of x.
670+
/// 3. repeat 2. with the respective leftShiftAmounts
671+
/// 4. construct a chain of vector.shuffles that will reconstruct the result
672+
/// from the chained shifts
636673
struct BitCastRewriter {
637674
/// Helper metadata struct to hold the static quantities for the rewrite.
638675
struct Metadata {
@@ -656,10 +693,23 @@ struct BitCastRewriter {
656693
Value initialValue, Value runningResult,
657694
const BitCastRewriter::Metadata &metadata);
658695

696+
/// Rewrite one step of the sequence when able to use a splat constant for the
697+
/// shiftright and shiftleft.
698+
Value splatRewriteStep(PatternRewriter &rewriter, Location loc,
699+
Value initialValue, Value runningResult,
700+
const BitCastRewriter::Metadata &metadata);
701+
702+
bool useSplatStep() { return cycleLen > 1; }
703+
659704
private:
660705
/// Underlying enumerator that encodes the provenance of the bits in the each
661706
/// element of the result vector.
662707
BitCastBitsEnumerator enumerator;
708+
709+
// Underlying cycleLen computed during precomputeMetadata. A cycleLen > 1
710+
// denotes that there is a cycle in the precomputed shift amounts and we are
711+
// able to use the splatRewriteStep.
712+
int64_t cycleLen = 0;
663713
};
664714

665715
} // namespace
@@ -775,8 +825,40 @@ static LogicalResult alignedConversionPrecondition(PatternRewriter &rewriter,
775825
return success();
776826
}
777827

828+
// Check if the vector is a cycle of the first cycleLen elements.
829+
template <class T>
830+
static bool isCyclic(SmallVector<T> xs, int64_t cycleLen) {
831+
for (int64_t idx = cycleLen, n = xs.size(); idx < n; idx++) {
832+
if (xs[idx] != xs[idx % cycleLen])
833+
return false;
834+
}
835+
return true;
836+
}
837+
838+
static SmallVector<int64_t> constructShuffles(int64_t inputSize,
839+
int64_t numCycles,
840+
int64_t cycleLen, int64_t idx) {
841+
// If idx == 1, then the first operand of the shuffle will be the mask which
842+
// will have the original size. So we need to step through the mask with a
843+
// stride of cycleSize.
844+
// When idx > 1, then the first operand will be the size of (idx * cycleSize)
845+
// and so we take the first idx elements of the input and then append the
846+
// strided mask value.
847+
int64_t inputStride = idx == 1 ? cycleLen : idx;
848+
849+
SmallVector<int64_t> shuffles;
850+
for (int64_t cycle = 0; cycle < numCycles; cycle++) {
851+
for (int64_t inputIdx = 0; inputIdx < idx; inputIdx++) {
852+
shuffles.push_back(cycle * inputStride + inputIdx);
853+
}
854+
shuffles.push_back(inputSize + cycle * cycleLen + idx);
855+
}
856+
return shuffles;
857+
}
858+
778859
SmallVector<BitCastRewriter::Metadata>
779860
BitCastRewriter::precomputeMetadata(IntegerType shuffledElementType) {
861+
bool cyclicShifts = true;
780862
SmallVector<BitCastRewriter::Metadata> result;
781863
for (int64_t shuffleIdx = 0, e = enumerator.getMaxNumberOfEntries();
782864
shuffleIdx < e; ++shuffleIdx) {
@@ -811,8 +893,71 @@ BitCastRewriter::precomputeMetadata(IntegerType shuffledElementType) {
811893
IntegerAttr::get(shuffledElementType, shiftLeft));
812894
}
813895

896+
// Compute a potential cycle size by detecting the number of sourceElements
897+
// at the start of shuffle that are the same
898+
cycleLen = 1;
899+
for (int64_t n = shuffles.size(); cycleLen < n; cycleLen++)
900+
if (shuffles[cycleLen] != shuffles[0])
901+
break;
902+
903+
cyclicShifts = cyclicShifts && (cycleLen < (int64_t)shuffles.size()) &&
904+
isCyclic(shiftRightAmounts, cycleLen) &&
905+
isCyclic(shiftLeftAmounts, cycleLen);
906+
814907
result.push_back({shuffles, masks, shiftRightAmounts, shiftLeftAmounts});
815908
}
909+
910+
cycleLen = cyclicShifts ? cycleLen : 0;
911+
return result;
912+
}
913+
914+
Value BitCastRewriter::splatRewriteStep(
915+
PatternRewriter &rewriter, Location loc, Value initialValue,
916+
Value runningResult, const BitCastRewriter::Metadata &metadata) {
917+
918+
// Initial result will be the Shifted Mask which will have the shuffles size.
919+
int64_t inputSize = metadata.shuffles.size();
920+
int64_t numCycles = inputSize / cycleLen;
921+
922+
auto shuffleOp = rewriter.create<vector::ShuffleOp>(
923+
loc, initialValue, initialValue, metadata.shuffles);
924+
925+
// Intersect with the mask.
926+
VectorType shuffledVectorType = shuffleOp.getResultVectorType();
927+
auto constOp = rewriter.create<arith::ConstantOp>(
928+
loc, DenseElementsAttr::get(shuffledVectorType, metadata.masks));
929+
Value andValue = rewriter.create<arith::AndIOp>(loc, shuffleOp, constOp);
930+
931+
Value result;
932+
for (int64_t idx = 0; idx < cycleLen; idx++) {
933+
auto shiftRightConstantOp = rewriter.create<arith::ConstantOp>(
934+
loc, SplatElementsAttr::get(shuffledVectorType,
935+
metadata.shiftRightAmounts[idx]));
936+
Value shiftedRight =
937+
rewriter.create<arith::ShRUIOp>(loc, andValue, shiftRightConstantOp);
938+
939+
auto shiftLeftConstantOp = rewriter.create<arith::ConstantOp>(
940+
loc, SplatElementsAttr::get(shuffledVectorType,
941+
metadata.shiftLeftAmounts[idx]));
942+
Value shiftedLeft =
943+
rewriter.create<arith::ShLIOp>(loc, shiftedRight, shiftLeftConstantOp);
944+
945+
if (result) {
946+
SmallVector<int64_t> shuffles =
947+
constructShuffles(inputSize, numCycles, cycleLen, idx);
948+
result = rewriter.create<vector::ShuffleOp>(loc, result, shiftedLeft,
949+
shuffles);
950+
951+
// After the first shuffle in the chain, the size of the input result will
952+
// grow as we append more shuffles together to reconstruct the
953+
// shuffledVectorType size. Each iteration they will retain numCycles more
954+
// elements.
955+
inputSize = numCycles * (idx + 1);
956+
} else {
957+
result = shiftedLeft;
958+
}
959+
}
960+
816961
return result;
817962
}
818963

@@ -961,8 +1106,12 @@ struct RewriteBitCastOfTruncI : OpRewritePattern<vector::BitCastOp> {
9611106
Value runningResult;
9621107
for (const BitCastRewriter ::Metadata &metadata :
9631108
bcr.precomputeMetadata(shuffledElementType)) {
964-
runningResult = bcr.genericRewriteStep(
965-
rewriter, bitCastOp->getLoc(), truncValue, runningResult, metadata);
1109+
runningResult =
1110+
bcr.useSplatStep()
1111+
? bcr.splatRewriteStep(rewriter, bitCastOp->getLoc(), truncValue,
1112+
runningResult, metadata)
1113+
: bcr.genericRewriteStep(rewriter, bitCastOp->getLoc(),
1114+
truncValue, runningResult, metadata);
9661115
}
9671116

9681117
// Finalize the rewrite.
@@ -1026,8 +1175,12 @@ struct RewriteExtOfBitCast : OpRewritePattern<ExtOpType> {
10261175
cast<IntegerType>(getElementTypeOrSelf(sourceValue.getType()));
10271176
for (const BitCastRewriter::Metadata &metadata :
10281177
bcr.precomputeMetadata(shuffledElementType)) {
1029-
runningResult = bcr.genericRewriteStep(
1030-
rewriter, bitCastOp->getLoc(), sourceValue, runningResult, metadata);
1178+
runningResult =
1179+
bcr.useSplatStep()
1180+
? bcr.splatRewriteStep(rewriter, bitCastOp->getLoc(), sourceValue,
1181+
runningResult, metadata)
1182+
: bcr.genericRewriteStep(rewriter, bitCastOp->getLoc(),
1183+
sourceValue, runningResult, metadata);
10311184
}
10321185

10331186
// Finalize the rewrite.

mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,42 @@ func.func @f4(%a: vector<16xi16>) -> vector<8xi6> {
146146
return %1 : vector<8xi6>
147147
}
148148

149+
// CHECK-LABEL: func.func @ftrunc_splat1(
150+
// CHECK-SAME: %[[A:[0-9a-z]*]]: vector<2xi16>) -> vector<1xi8> {
151+
func.func @ftrunc_splat1(%a: vector<2xi16>) -> vector<1xi8> {
152+
// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<15> : vector<1xi16>
153+
// CHECK-DAG: %[[SHL_CST:.*]] = arith.constant dense<4> : vector<1xi16>
154+
// CHECK: %[[V0:.*]] = vector.shuffle %[[A]], %[[A]] [0] : vector<2xi16>, vector<2xi16>
155+
// CHECK: %[[A0:.*]] = arith.andi %[[V0]], %[[MASK]] : vector<1xi16>
156+
// CHECK: %[[V1:.*]] = vector.shuffle %[[A]], %[[A]] [1] : vector<2xi16>, vector<2xi16>
157+
// CHECK: %[[A1:.*]] = arith.andi %[[V1]], %[[MASK]] : vector<1xi16>
158+
// CHECK: %[[SHL0:.*]] = arith.shli %[[A1]], %[[SHL_CST]] : vector<1xi16>
159+
// CHECK: %[[O1:.*]] = arith.ori %[[A0]], %[[SHL0]] : vector<1xi16>
160+
// CHECK: %[[RES:.*]] = arith.trunci %[[O1]] : vector<1xi16> to vector<1xi8>
161+
// return %[[RES]] : vector<1xi8>
162+
%0 = arith.trunci %a : vector<2xi16> to vector<2xi4>
163+
%1 = vector.bitcast %0 : vector<2xi4> to vector<1xi8>
164+
return %1 : vector<1xi8>
165+
}
166+
167+
// CHECK-LABEL: func.func @ftrunc_splat2(
168+
// CHECK-SAME: %[[A:[0-9a-z]*]]: vector<4xi16>) -> vector<2xi8> {
169+
func.func @ftrunc_splat2(%a: vector<4xi16>) -> vector<2xi8> {
170+
// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<15> : vector<2xi16>
171+
// CHECK-DAG: %[[SHL_CST:.*]] = arith.constant dense<4> : vector<2xi16>
172+
// CHECK: %[[V0:.*]] = vector.shuffle %[[A]], %[[A]] [0, 2] : vector<4xi16>, vector<4xi16>
173+
// CHECK: %[[A0:.*]] = arith.andi %[[V0]], %[[MASK]] : vector<2xi16>
174+
// CHECK: %[[V1:.*]] = vector.shuffle %[[A]], %[[A]] [1, 3] : vector<4xi16>, vector<4xi16>
175+
// CHECK: %[[A1:.*]] = arith.andi %[[V1]], %[[MASK]] : vector<2xi16>
176+
// CHECK: %[[SHL0:.*]] = arith.shli %[[A1]], %[[SHL_CST]] : vector<2xi16>
177+
// CHECK: %[[O1:.*]] = arith.ori %[[A0]], %[[SHL0]] : vector<2xi16>
178+
// CHECK: %[[RES:.*]] = arith.trunci %[[O1]] : vector<2xi16> to vector<2xi8>
179+
// return %[[RES]] : vector<2xi8>
180+
%0 = arith.trunci %a : vector<4xi16> to vector<4xi4>
181+
%1 = vector.bitcast %0 : vector<4xi4> to vector<2xi8>
182+
return %1 : vector<2xi8>
183+
}
184+
149185
// CHECK-LABEL: func.func @f1ext(
150186
// CHECK-SAME: %[[A:[0-9a-z]*]]: vector<5xi8>) -> vector<8xi16> {
151187
func.func @f1ext(%a: vector<5xi8>) -> vector<8xi16> {
@@ -193,6 +229,44 @@ func.func @f3ext(%a: vector<5xi8>) -> vector<8xi17> {
193229
return %1 : vector<8xi17>
194230
}
195231

232+
// CHECK-LABEL: func.func @fext_splat1(
233+
// CHECK-SAME: %[[A:[0-9a-z]*]]: vector<2xi8>) -> vector<4xi16> {
234+
func.func @fext_splat1(%a: vector<2xi8>) -> vector<4xi16> {
235+
// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<[15, -16, 15, -16]> : vector<4xi8>
236+
// CHECK-DAG: %[[SHR_CST:.*]] = arith.constant dense<4> : vector<4xi8>
237+
// CHECK: %[[V0:.*]] = vector.shuffle %[[A]], %[[A]] [0, 0, 1, 1] : vector<2xi8>, vector<2xi8>
238+
// CHECK: %[[A0:.*]] = arith.andi %[[V0]], %[[MASK]] : vector<4xi8>
239+
// CHECK: %[[SHR0:.*]] = arith.shrui %[[A0]], %[[SHR_CST]] : vector<4xi8>
240+
// CHECK: %[[V1:.*]] = vector.shuffle %[[A0]], %[[SHR0]] [0, 5, 2, 7] : vector<4xi8>, vector<4xi8>
241+
// CHECK: %[[RES:.*]] = arith.extui %[[V1]] : vector<4xi8> to vector<4xi16>
242+
// return %[[RES]] : vector<4xi16>
243+
%0 = vector.bitcast %a : vector<2xi8> to vector<4xi4>
244+
%1 = arith.extui %0 : vector<4xi4> to vector<4xi16>
245+
return %1 : vector<4xi16>
246+
}
247+
248+
// CHECK-LABEL: func.func @fext_splat2(
249+
// CHECK-SAME: %[[A:[0-9a-z]*]]: vector<3xi16>) -> vector<12xi32> {
250+
func.func @fext_splat2(%a: vector<3xi16>) -> vector<12xi32> {
251+
// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<[15, 240, 3840, -4096, 15, 240, 3840, -4096, 15, 240, 3840, -4096]> : vector<12xi16>
252+
// CHECK-DAG: %[[SHR_CST0:.*]] = arith.constant dense<4> : vector<12xi16>
253+
// CHECK-DAG: %[[SHR_CST1:.*]] = arith.constant dense<8> : vector<12xi16>
254+
// CHECK-DAG: %[[SHR_CST2:.*]] = arith.constant dense<12> : vector<12xi16>
255+
// CHECK: %[[V0:.*]] = vector.shuffle %[[A]], %[[A]] [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2] : vector<3xi16>, vector<3xi16>
256+
// CHECK: %[[A0:.*]] = arith.andi %[[V0]], %[[MASK]] : vector<12xi16>
257+
// CHECK: %[[SHR0:.*]] = arith.shrui %[[A0]], %[[SHR_CST0]] : vector<12xi16>
258+
// CHECK: %[[V1:.*]] = vector.shuffle %[[A0]], %[[SHR0]] [0, 13, 4, 17, 8, 21] : vector<12xi16>, vector<12xi16>
259+
// CHECK: %[[SHR1:.*]] = arith.shrui %[[A0]], %[[SHR_CST1]] : vector<12xi16>
260+
// CHECK: %[[V2:.*]] = vector.shuffle %[[V1]], %[[SHR1]] [0, 1, 8, 2, 3, 12, 4, 5, 16] : vector<6xi16>, vector<12xi16>
261+
// CHECK: %[[SHR2:.*]] = arith.shrui %[[A0]], %[[SHR_CST2]] : vector<12xi16>
262+
// CHECK: %[[V3:.*]] = vector.shuffle %[[V2]], %[[SHR2]] [0, 1, 2, 12, 3, 4, 5, 16, 6, 7, 8, 20] : vector<9xi16>, vector<12xi16>
263+
// CHECK: %[[RES:.*]] = arith.extui %[[V3]] : vector<12xi16> to vector<12xi32>
264+
// CHEKC: return %[[RES]] : vector<12xi32>
265+
%0 = vector.bitcast %a : vector<3xi16> to vector<12xi4>
266+
%1 = arith.extui %0 : vector<12xi4> to vector<12xi32>
267+
return %1 : vector<12xi32>
268+
}
269+
196270
// CHECK-LABEL: func.func @aligned_extsi(
197271
func.func @aligned_extsi(%a: vector<8xi4>) -> vector<8xi32> {
198272
// CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xi32> {

mlir/test/Integration/Dialect/Vector/CPU/test-rewrite-narrow-types.mlir

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,36 @@ func.func @f3(%v: vector<2xi48>) {
124124
return
125125
}
126126

127+
func.func @print_as_i1_2xi8(%v : vector<2xi8>) {
128+
%bitsi16 = vector.bitcast %v : vector<2xi8> to vector<16xi1>
129+
vector.print %bitsi16 : vector<16xi1>
130+
return
131+
}
132+
133+
func.func @print_as_i1_4xi4(%v : vector<4xi4>) {
134+
%bitsi16 = vector.bitcast %v : vector<4xi4> to vector<16xi1>
135+
vector.print %bitsi16 : vector<16xi1>
136+
return
137+
}
138+
139+
func.func @ftrunc_splat(%v: vector<2xi24>) {
140+
%trunc = arith.trunci %v : vector<2xi24> to vector<2xi8>
141+
func.call @print_as_i1_2xi8(%trunc) : (vector<2xi8>) -> ()
142+
// CHECK: (
143+
// CHECK-SAME: 0, 1, 1, 1, 1, 1, 1, 1,
144+
// CHECK-SAME: 1, 1, 0, 0, 0, 0, 1, 1 )
145+
146+
%bitcast = vector.bitcast %trunc : vector<2xi8> to vector<4xi4>
147+
func.call @print_as_i1_4xi4(%bitcast) : (vector<4xi4>) -> ()
148+
// CHECK: (
149+
// CHECK-SAME: 0, 1, 1, 1,
150+
// CHECK-SAME: 1, 1, 1, 1,
151+
// CHECK-SAME: 1, 1, 0, 0,
152+
// CHECK-SAME: 0, 0, 1, 1 )
153+
154+
return
155+
}
156+
127157
func.func @print_as_i1_8xi5(%v : vector<8xi5>) {
128158
%bitsi40 = vector.bitcast %v : vector<8xi5> to vector<40xi1>
129159
vector.print %bitsi40 : vector<40xi1>
@@ -164,6 +194,32 @@ func.func @fext(%a: vector<5xi8>) {
164194
return
165195
}
166196

197+
func.func @print_as_i1_4xi8(%v : vector<4xi8>) {
198+
%bitsi32 = vector.bitcast %v : vector<4xi8> to vector<32xi1>
199+
vector.print %bitsi32 : vector<32xi1>
200+
return
201+
}
202+
203+
func.func @fext_splat(%a: vector<2xi8>) {
204+
%0 = vector.bitcast %a : vector<2xi8> to vector<4xi4>
205+
func.call @print_as_i1_4xi4(%0) : (vector<4xi4>) -> ()
206+
// CHECK: (
207+
// CHECK-SAME: 0, 1, 1, 1,
208+
// CHECK-SAME: 1, 1, 1, 1,
209+
// CHECK-SAME: 1, 1, 0, 0,
210+
// CHECK-SAME: 0, 0, 1, 1 )
211+
212+
%1 = arith.extui %0 : vector<4xi4> to vector<4xi8>
213+
func.call @print_as_i1_4xi8(%1) : (vector<4xi8>) -> ()
214+
// CHECK: (
215+
// CHECK-SAME: 0, 1, 1, 1, 0, 0, 0, 0,
216+
// CHECK-SAME: 1, 1, 1, 1, 0, 0, 0, 0,
217+
// CHECK-SAME: 1, 1, 0, 0, 0, 0, 0, 0,
218+
// CHECK-SAME: 0, 0, 1, 1, 0, 0, 0, 0 )
219+
220+
return
221+
}
222+
167223
func.func @fcst_maskedload(%A: memref<?xi4>, %passthru: vector<6xi4>) -> vector<6xi4> {
168224
%c0 = arith.constant 0: index
169225
%mask = vector.constant_mask [3] : vector<6xi1>
@@ -190,9 +246,19 @@ func.func @entry() {
190246
func.call @f3(%v3) : (vector<2xi48>) -> ()
191247

192248
%v4 = arith.constant dense<[
249+
0xafe, 0xbc3
250+
]> : vector<2xi24>
251+
func.call @ftrunc_splat(%v4) : (vector<2xi24>) -> ()
252+
253+
%v5 = arith.constant dense<[
193254
0xef, 0xee, 0xed, 0xec, 0xeb
194255
]> : vector<5xi8>
195-
func.call @fext(%v4) : (vector<5xi8>) -> ()
256+
func.call @fext(%v5) : (vector<5xi8>) -> ()
257+
258+
%v6 = arith.constant dense<[
259+
0xfe, 0xc3
260+
]> : vector<2xi8>
261+
func.call @fext_splat(%v6) : (vector<2xi8>) -> ()
196262

197263
// Set up memory.
198264
%c0 = arith.constant 0: index

0 commit comments

Comments
 (0)