Skip to content

Commit 9c9e621

Browse files
Improve performance of vector broadcast in SVE2
Modified codegen of vector broadcast in SVE2 to emit TBL ARM intrin instead of llvm.vector.insert. Fix performance test failure of nested_vectorization_gemm
1 parent a7bc84b commit 9c9e621

File tree

1 file changed

+0
-13
lines changed

1 file changed

+0
-13
lines changed

src/CodeGen_ARM.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2067,19 +2067,6 @@ void CodeGen_ARM::visit(const Shuffle *op) {
20672067
value = insert_scalable_vector(padding, val_0, 0);
20682068
return;
20692069
}
2070-
} else if (op->is_broadcast()) {
2071-
// Undo simplification to avoid arbitrary-indexed shuffle
2072-
Expr equiv;
2073-
for (int f = 0; f < op->broadcast_factor(); ++f) {
2074-
if (equiv.defined()) {
2075-
equiv = Shuffle::make_concat({equiv, op->vectors[0]});
2076-
} else {
2077-
equiv = op->vectors[0];
2078-
}
2079-
}
2080-
equiv = common_subexpression_elimination(equiv);
2081-
value = codegen(equiv);
2082-
return;
20832070
}
20842071

20852072
CodeGen_Posix::visit(op);

0 commit comments

Comments
 (0)