-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[SLP][AArch64][NFC] Add more tests for SLP vectorization of div #113876
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Currently, we dont have much tests that show SLP outcome for integer divisions. This patch adds tests for same. In certain scenarios, for Neon, vectorization is profitable. An attempt would be made in future to improve the cost-model for the same.
|
@llvm/pr-subscribers-llvm-transforms Author: Sushant Gokhale (sushgokh) ChangesCurrently, we dont have much tests that show SLP outcome for integer divisions. This patch adds tests for same. In certain scenarios, for Neon, vectorization is profitable. An attempt would be made in future to improve the cost-model for the same. Patch is 27.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113876.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
new file mode 100644
index 00000000000000..e972955e26cb47
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=aarch64 -passes=slp-vectorizer -mattr=-sve -S < %s | FileCheck %s --check-prefixes=CHECK,NO-SVE
+; RUN: opt -mtriple=aarch64 -passes=slp-vectorizer -mattr=+sve -S < %s | FileCheck %s --check-prefixes=CHECK,SVE
+
+define <2 x i8> @slp_v2i8_Op1_Op2_unknown(<2 x i8> %a, <2 x i8> %b)
+; NO-SVE-LABEL: define <2 x i8> @slp_v2i8_Op1_Op2_unknown(
+; NO-SVE-SAME: <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i8> [[A]], i32 0
+; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i8> [[A]], i32 1
+; NO-SVE-NEXT: [[B0:%.*]] = extractelement <2 x i8> [[B]], i32 0
+; NO-SVE-NEXT: [[B1:%.*]] = extractelement <2 x i8> [[B]], i32 1
+; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i8 [[A0]], [[B0]]
+; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i8 [[A1]], [[B1]]
+; NO-SVE-NEXT: [[R0:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i32 0
+; NO-SVE-NEXT: [[R1:%.*]] = insertelement <2 x i8> [[R0]], i8 [[TMP2]], i32 1
+; NO-SVE-NEXT: ret <2 x i8> [[R1]]
+;
+; SVE-LABEL: define <2 x i8> @slp_v2i8_Op1_Op2_unknown(
+; SVE-SAME: <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i8> [[A]], [[B]]
+; SVE-NEXT: ret <2 x i8> [[TMP1]]
+;
+{
+ %a0 = extractelement <2 x i8> %a, i32 0
+ %a1 = extractelement <2 x i8> %a, i32 1
+ %b0 = extractelement <2 x i8> %b, i32 0
+ %b1 = extractelement <2 x i8> %b, i32 1
+ %1 = sdiv i8 %a0, %b0
+ %2 = sdiv i8 %a1, %b1
+ %r0 = insertelement <2 x i8> poison, i8 %1, i32 0
+ %r1 = insertelement <2 x i8> %r0, i8 %2, i32 1
+ ret <2 x i8> %r1
+}
+
+define <2 x i16> @slp_v2i16_Op1_Op2_unknown(<2 x i16> %a, <2 x i16> %b)
+; NO-SVE-LABEL: define <2 x i16> @slp_v2i16_Op1_Op2_unknown(
+; NO-SVE-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i16> [[A]], i32 0
+; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i16> [[A]], i32 1
+; NO-SVE-NEXT: [[B0:%.*]] = extractelement <2 x i16> [[B]], i32 0
+; NO-SVE-NEXT: [[B1:%.*]] = extractelement <2 x i16> [[B]], i32 1
+; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i16 [[A0]], [[B0]]
+; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i16 [[A1]], [[B1]]
+; NO-SVE-NEXT: [[R0:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i32 0
+; NO-SVE-NEXT: [[R1:%.*]] = insertelement <2 x i16> [[R0]], i16 [[TMP2]], i32 1
+; NO-SVE-NEXT: ret <2 x i16> [[R1]]
+;
+; SVE-LABEL: define <2 x i16> @slp_v2i16_Op1_Op2_unknown(
+; SVE-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[A]], [[B]]
+; SVE-NEXT: ret <2 x i16> [[TMP1]]
+;
+{
+ %a0 = extractelement <2 x i16> %a, i32 0
+ %a1 = extractelement <2 x i16> %a, i32 1
+ %b0 = extractelement <2 x i16> %b, i32 0
+ %b1 = extractelement <2 x i16> %b, i32 1
+ %1 = sdiv i16 %a0, %b0
+ %2 = sdiv i16 %a1, %b1
+ %r0 = insertelement <2 x i16> poison, i16 %1, i32 0
+ %r1 = insertelement <2 x i16> %r0, i16 %2, i32 1
+ ret <2 x i16> %r1
+}
+
+define <2 x i32> @slp_v2i32_Op1_Op2_unknown(<2 x i32> %a, <2 x i32> %b)
+; NO-SVE-LABEL: define <2 x i32> @slp_v2i32_Op1_Op2_unknown(
+; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i32> [[A]], i32 0
+; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i32> [[A]], i32 1
+; NO-SVE-NEXT: [[B0:%.*]] = extractelement <2 x i32> [[B]], i32 0
+; NO-SVE-NEXT: [[B1:%.*]] = extractelement <2 x i32> [[B]], i32 1
+; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], [[B0]]
+; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], [[B1]]
+; NO-SVE-NEXT: [[R0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0
+; NO-SVE-NEXT: [[R1:%.*]] = insertelement <2 x i32> [[R0]], i32 [[TMP2]], i32 1
+; NO-SVE-NEXT: ret <2 x i32> [[R1]]
+;
+; SVE-LABEL: define <2 x i32> @slp_v2i32_Op1_Op2_unknown(
+; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[A]], [[B]]
+; SVE-NEXT: ret <2 x i32> [[TMP1]]
+;
+{
+ %a0 = extractelement <2 x i32> %a, i32 0
+ %a1 = extractelement <2 x i32> %a, i32 1
+ %b0 = extractelement <2 x i32> %b, i32 0
+ %b1 = extractelement <2 x i32> %b, i32 1
+ %1 = sdiv i32 %a0, %b0
+ %2 = sdiv i32 %a1, %b1
+ %r0 = insertelement <2 x i32> poison, i32 %1, i32 0
+ %r1 = insertelement <2 x i32> %r0, i32 %2, i32 1
+ ret <2 x i32> %r1
+}
+
+define <2 x i64> @slp_v2i64_Op1_Op2_unknown(<2 x i64> %a, <2 x i64> %b)
+; NO-SVE-LABEL: define <2 x i64> @slp_v2i64_Op1_Op2_unknown(
+; NO-SVE-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i64> [[A]], i32 0
+; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i64> [[A]], i32 1
+; NO-SVE-NEXT: [[B0:%.*]] = extractelement <2 x i64> [[B]], i32 0
+; NO-SVE-NEXT: [[B1:%.*]] = extractelement <2 x i64> [[B]], i32 1
+; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i64 [[A0]], [[B0]]
+; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i64 [[A1]], [[B1]]
+; NO-SVE-NEXT: [[R0:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i32 0
+; NO-SVE-NEXT: [[R1:%.*]] = insertelement <2 x i64> [[R0]], i64 [[TMP2]], i32 1
+; NO-SVE-NEXT: ret <2 x i64> [[R1]]
+;
+; SVE-LABEL: define <2 x i64> @slp_v2i64_Op1_Op2_unknown(
+; SVE-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i64> [[A]], [[B]]
+; SVE-NEXT: ret <2 x i64> [[TMP1]]
+;
+{
+ %a0 = extractelement <2 x i64> %a, i32 0
+ %a1 = extractelement <2 x i64> %a, i32 1
+ %b0 = extractelement <2 x i64> %b, i32 0
+ %b1 = extractelement <2 x i64> %b, i32 1
+ %1 = sdiv i64 %a0, %b0
+ %2 = sdiv i64 %a1, %b1
+ %r0 = insertelement <2 x i64> poison, i64 %1, i32 0
+ %r1 = insertelement <2 x i64> %r0, i64 %2, i32 1
+ ret <2 x i64> %r1
+}
+
+define <4 x i8> @slp_v4i8_Op1_Op2_unknown(<4 x i8> %a, <4 x i8> %b)
+; NO-SVE-LABEL: define <4 x i8> @slp_v4i8_Op1_Op2_unknown(
+; NO-SVE-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i8> [[A]], i32 0
+; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i8> [[A]], i32 1
+; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i8> [[A]], i32 2
+; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i8> [[A]], i32 3
+; NO-SVE-NEXT: [[B0:%.*]] = extractelement <4 x i8> [[B]], i32 0
+; NO-SVE-NEXT: [[B1:%.*]] = extractelement <4 x i8> [[B]], i32 1
+; NO-SVE-NEXT: [[B2:%.*]] = extractelement <4 x i8> [[B]], i32 2
+; NO-SVE-NEXT: [[B3:%.*]] = extractelement <4 x i8> [[B]], i32 3
+; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i8 [[A0]], [[B0]]
+; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i8 [[A1]], [[B1]]
+; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i8 [[A2]], [[B2]]
+; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i8 [[A3]], [[B3]]
+; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0
+; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i8> [[R0]], i8 [[TMP2]], i32 1
+; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i8> [[R1]], i8 [[TMP3]], i32 2
+; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i8> [[R2]], i8 [[TMP4]], i32 3
+; NO-SVE-NEXT: ret <4 x i8> [[R3]]
+;
+; SVE-LABEL: define <4 x i8> @slp_v4i8_Op1_Op2_unknown(
+; SVE-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i8> [[A]], [[B]]
+; SVE-NEXT: ret <4 x i8> [[TMP1]]
+;
+{
+ %a0 = extractelement <4 x i8> %a, i32 0
+ %a1 = extractelement <4 x i8> %a, i32 1
+ %a2 = extractelement <4 x i8> %a, i32 2
+ %a3 = extractelement <4 x i8> %a, i32 3
+ %b0 = extractelement <4 x i8> %b, i32 0
+ %b1 = extractelement <4 x i8> %b, i32 1
+ %b2 = extractelement <4 x i8> %b, i32 2
+ %b3 = extractelement <4 x i8> %b, i32 3
+ %1 = sdiv i8 %a0, %b0
+ %2 = sdiv i8 %a1, %b1
+ %3 = sdiv i8 %a2, %b2
+ %4 = sdiv i8 %a3, %b3
+ %r0 = insertelement <4 x i8> poison, i8 %1, i32 0
+ %r1 = insertelement <4 x i8> %r0, i8 %2, i32 1
+ %r2 = insertelement <4 x i8> %r1, i8 %3, i32 2
+ %r3 = insertelement <4 x i8> %r2, i8 %4, i32 3
+ ret <4 x i8> %r3
+}
+
+define <4 x i16> @slp_v4i16_Op1_Op2_unknown(<4 x i16> %a, <4 x i16> %b)
+; NO-SVE-LABEL: define <4 x i16> @slp_v4i16_Op1_Op2_unknown(
+; NO-SVE-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i16> [[A]], i32 0
+; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i16> [[A]], i32 1
+; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i16> [[A]], i32 2
+; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i16> [[A]], i32 3
+; NO-SVE-NEXT: [[B0:%.*]] = extractelement <4 x i16> [[B]], i32 0
+; NO-SVE-NEXT: [[B1:%.*]] = extractelement <4 x i16> [[B]], i32 1
+; NO-SVE-NEXT: [[B2:%.*]] = extractelement <4 x i16> [[B]], i32 2
+; NO-SVE-NEXT: [[B3:%.*]] = extractelement <4 x i16> [[B]], i32 3
+; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i16 [[A0]], [[B0]]
+; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i16 [[A1]], [[B1]]
+; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i16 [[A2]], [[B2]]
+; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i16 [[A3]], [[B3]]
+; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i32 0
+; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i16> [[R0]], i16 [[TMP2]], i32 1
+; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i16> [[R1]], i16 [[TMP3]], i32 2
+; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i16> [[R2]], i16 [[TMP4]], i32 3
+; NO-SVE-NEXT: ret <4 x i16> [[R3]]
+;
+; SVE-LABEL: define <4 x i16> @slp_v4i16_Op1_Op2_unknown(
+; SVE-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i16> [[A]], [[B]]
+; SVE-NEXT: ret <4 x i16> [[TMP1]]
+;
+{
+ %a0 = extractelement <4 x i16> %a, i32 0
+ %a1 = extractelement <4 x i16> %a, i32 1
+ %a2 = extractelement <4 x i16> %a, i32 2
+ %a3 = extractelement <4 x i16> %a, i32 3
+ %b0 = extractelement <4 x i16> %b, i32 0
+ %b1 = extractelement <4 x i16> %b, i32 1
+ %b2 = extractelement <4 x i16> %b, i32 2
+ %b3 = extractelement <4 x i16> %b, i32 3
+ %1 = sdiv i16 %a0, %b0
+ %2 = sdiv i16 %a1, %b1
+ %3 = sdiv i16 %a2, %b2
+ %4 = sdiv i16 %a3, %b3
+ %r0 = insertelement <4 x i16> poison, i16 %1, i32 0
+ %r1 = insertelement <4 x i16> %r0, i16 %2, i32 1
+ %r2 = insertelement <4 x i16> %r1, i16 %3, i32 2
+ %r3 = insertelement <4 x i16> %r2, i16 %4, i32 3
+ ret <4 x i16> %r3
+}
+
+define <4 x i32> @slp_v4i32_Op1_Op2_unknown(<4 x i32> %a, <4 x i32> %b)
+; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_Op2_unknown(
+; NO-SVE-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0
+; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
+; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
+; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
+; NO-SVE-NEXT: [[B0:%.*]] = extractelement <4 x i32> [[B]], i32 0
+; NO-SVE-NEXT: [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1
+; NO-SVE-NEXT: [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2
+; NO-SVE-NEXT: [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
+; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], [[B0]]
+; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], [[B1]]
+; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i32 [[A2]], [[B2]]
+; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i32 [[A3]], [[B3]]
+; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1
+; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2
+; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3
+; NO-SVE-NEXT: ret <4 x i32> [[R3]]
+;
+; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_Op2_unknown(
+; SVE-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], [[B]]
+; SVE-NEXT: ret <4 x i32> [[TMP1]]
+;
+{
+ %a0 = extractelement <4 x i32> %a, i32 0
+ %a1 = extractelement <4 x i32> %a, i32 1
+ %a2 = extractelement <4 x i32> %a, i32 2
+ %a3 = extractelement <4 x i32> %a, i32 3
+ %b0 = extractelement <4 x i32> %b, i32 0
+ %b1 = extractelement <4 x i32> %b, i32 1
+ %b2 = extractelement <4 x i32> %b, i32 2
+ %b3 = extractelement <4 x i32> %b, i32 3
+ %1 = sdiv i32 %a0, %b0
+ %2 = sdiv i32 %a1, %b1
+ %3 = sdiv i32 %a2, %b2
+ %4 = sdiv i32 %a3, %b3
+ %r0 = insertelement <4 x i32> poison, i32 %1, i32 0
+ %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1
+ %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2
+ %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3
+ ret <4 x i32> %r3
+}
+
+define <8 x i8> @slp_v8i8_Op1_Op2_unknown(<8 x i8> %a, <8 x i8> %b)
+; NO-SVE-LABEL: define <8 x i8> @slp_v8i8_Op1_Op2_unknown(
+; NO-SVE-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT: [[A0:%.*]] = extractelement <8 x i8> [[A]], i32 0
+; NO-SVE-NEXT: [[A1:%.*]] = extractelement <8 x i8> [[A]], i32 1
+; NO-SVE-NEXT: [[A2:%.*]] = extractelement <8 x i8> [[A]], i32 2
+; NO-SVE-NEXT: [[A3:%.*]] = extractelement <8 x i8> [[A]], i32 3
+; NO-SVE-NEXT: [[A4:%.*]] = extractelement <8 x i8> [[A]], i32 4
+; NO-SVE-NEXT: [[A5:%.*]] = extractelement <8 x i8> [[A]], i32 5
+; NO-SVE-NEXT: [[A6:%.*]] = extractelement <8 x i8> [[A]], i32 6
+; NO-SVE-NEXT: [[A7:%.*]] = extractelement <8 x i8> [[A]], i32 7
+; NO-SVE-NEXT: [[B0:%.*]] = extractelement <8 x i8> [[B]], i32 0
+; NO-SVE-NEXT: [[B1:%.*]] = extractelement <8 x i8> [[B]], i32 1
+; NO-SVE-NEXT: [[B2:%.*]] = extractelement <8 x i8> [[B]], i32 2
+; NO-SVE-NEXT: [[B3:%.*]] = extractelement <8 x i8> [[B]], i32 3
+; NO-SVE-NEXT: [[B4:%.*]] = extractelement <8 x i8> [[B]], i32 4
+; NO-SVE-NEXT: [[B5:%.*]] = extractelement <8 x i8> [[B]], i32 5
+; NO-SVE-NEXT: [[B6:%.*]] = extractelement <8 x i8> [[B]], i32 6
+; NO-SVE-NEXT: [[B7:%.*]] = extractelement <8 x i8> [[B]], i32 7
+; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i8 [[A0]], [[B0]]
+; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i8 [[A1]], [[B1]]
+; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i8 [[A2]], [[B2]]
+; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i8 [[A3]], [[B3]]
+; NO-SVE-NEXT: [[TMP5:%.*]] = sdiv i8 [[A4]], [[B4]]
+; NO-SVE-NEXT: [[TMP6:%.*]] = sdiv i8 [[A5]], [[B5]]
+; NO-SVE-NEXT: [[TMP7:%.*]] = sdiv i8 [[A6]], [[B6]]
+; NO-SVE-NEXT: [[TMP8:%.*]] = sdiv i8 [[A7]], [[B7]]
+; NO-SVE-NEXT: [[R0:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0
+; NO-SVE-NEXT: [[R1:%.*]] = insertelement <8 x i8> [[R0]], i8 [[TMP2]], i32 1
+; NO-SVE-NEXT: [[R2:%.*]] = insertelement <8 x i8> [[R1]], i8 [[TMP3]], i32 2
+; NO-SVE-NEXT: [[R3:%.*]] = insertelement <8 x i8> [[R2]], i8 [[TMP4]], i32 3
+; NO-SVE-NEXT: [[R4:%.*]] = insertelement <8 x i8> [[R3]], i8 [[TMP5]], i32 4
+; NO-SVE-NEXT: [[R5:%.*]] = insertelement <8 x i8> [[R4]], i8 [[TMP6]], i32 5
+; NO-SVE-NEXT: [[R6:%.*]] = insertelement <8 x i8> [[R5]], i8 [[TMP7]], i32 6
+; NO-SVE-NEXT: [[R7:%.*]] = insertelement <8 x i8> [[R6]], i8 [[TMP8]], i32 7
+; NO-SVE-NEXT: ret <8 x i8> [[R3]]
+;
+; SVE-LABEL: define <8 x i8> @slp_v8i8_Op1_Op2_unknown(
+; SVE-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SVE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SVE-NEXT: [[TMP3:%.*]] = sdiv <4 x i8> [[TMP1]], [[TMP2]]
+; SVE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SVE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SVE-NEXT: [[TMP6:%.*]] = sdiv <4 x i8> [[TMP4]], [[TMP5]]
+; SVE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SVE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SVE-NEXT: [[R71:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SVE-NEXT: ret <8 x i8> [[TMP7]]
+;
+{
+ %a0 = extractelement <8 x i8> %a, i32 0
+ %a1 = extractelement <8 x i8> %a, i32 1
+ %a2 = extractelement <8 x i8> %a, i32 2
+ %a3 = extractelement <8 x i8> %a, i32 3
+ %a4 = extractelement <8 x i8> %a, i32 4
+ %a5 = extractelement <8 x i8> %a, i32 5
+ %a6 = extractelement <8 x i8> %a, i32 6
+ %a7 = extractelement <8 x i8> %a, i32 7
+ %b0 = extractelement <8 x i8> %b, i32 0
+ %b1 = extractelement <8 x i8> %b, i32 1
+ %b2 = extractelement <8 x i8> %b, i32 2
+ %b3 = extractelement <8 x i8> %b, i32 3
+ %b4 = extractelement <8 x i8> %b, i32 4
+ %b5 = extractelement <8 x i8> %b, i32 5
+ %b6 = extractelement <8 x i8> %b, i32 6
+ %b7 = extractelement <8 x i8> %b, i32 7
+ %1 = sdiv i8 %a0, %b0
+ %2 = sdiv i8 %a1, %b1
+ %3 = sdiv i8 %a2, %b2
+ %4 = sdiv i8 %a3, %b3
+ %5 = sdiv i8 %a4, %b4
+ %6 = sdiv i8 %a5, %b5
+ %7 = sdiv i8 %a6, %b6
+ %8 = sdiv i8 %a7, %b7
+ %r0 = insertelement <8 x i8> poison, i8 %1, i32 0
+ %r1 = insertelement <8 x i8> %r0, i8 %2, i32 1
+ %r2 = insertelement <8 x i8> %r1, i8 %3, i32 2
+ %r3 = insertelement <8 x i8> %r2, i8 %4, i32 3
+ %r4 = insertelement <8 x i8> %r3, i8 %5, i32 4
+ %r5 = insertelement <8 x i8> %r4, i8 %6, i32 5
+ %r6 = insertelement <8 x i8> %r5, i8 %7, i32 6
+ %r7 = insertelement <8 x i8> %r6, i8 %8, i32 7
+ ret <8 x i8> %r3
+}
+
+define <8 x i16> @slp_v8i16_Op1_Op2_unknown(<8 x i16> %a, <8 x i16> %b)
+; NO-SVE-LABEL: define <8 x i16> @slp_v8i16_Op1_Op2_unknown(
+; NO-SVE-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT: [[A0:%.*]] = extractelement <8 x i16> [[A]], i32 0
+; NO-SVE-NEXT: [[A1:%.*]] = extractelement <8 x i16> [[A]], i32 1
+; NO-SVE-NEXT: [[A2:%.*]] = extractelement <8 x i16> [[A]], i32 2
+; NO-SVE-NEXT: [[A3:%.*]] = extractelement <8 x i16> [[A]], i32 3
+; NO-SVE-NEXT: [[A4:%.*]] = extractelement <8 x i16> [[A]], i32 4
+; NO-SVE-NEXT: [[A5:%.*]] = extractelement <8 x i16> [[A]], i32 5
+; NO-SVE-NEXT: [[A6:%.*]] = extractelement <8 x i16> [[A]], i32 6
+; NO-SVE-NEXT: [[A7:%.*]] = extractelement <8 x i16> [[A]], i32 7
+; NO-SVE-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B]], i32 0
+; NO-SVE-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
+; NO-SVE-NEXT: [[B2:%.*]] = extractelement <8 x i16> [[B]], i32 2
+; NO-SVE-NEXT: [[B3:%.*]] = extractelement <8 x i16> [[B]], i32 3
+; NO-SVE-NEXT: [[B4:%.*]] = extractelement <8 x i16> [[B]], i32 4
+; NO-SVE-NEXT: [[B5:%.*]] = extractelement <8 x i16> [[B]], i32 5
+; NO-SVE-NEXT: [[B6:%.*]] = extractelement <8 x i16> [[B]], i32 6
+; NO-SVE-NEXT: [[B7:%.*]] = extractelement <8 x i16> [[B]], i32 7
+; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i16 [[A0]], [[B0]]
+; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i16 [[A1]], [[B1]]
+; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i16 [[A2]], [[B2]]
+; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i16 [[A3]], [[B3]]
+; NO-SVE-NEXT: [[TMP5:%.*]] = sdiv i16 [[A4]], [[B4]]
+; NO-SVE-NEXT: [[TMP6:%.*]] = sdiv i16 [[A5]], [[B5]]
+; NO-SVE-NEXT: [[TMP7:%.*]] = sdiv i16 [[A6]], [[B6]]
+; NO-SVE-NEXT: [[TMP8:%.*]] = sdiv i16 [[A7]], [[B7]]
+; NO-SVE-NEXT: [[R0:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i32 0
+; NO-SVE-NEXT: [[R1:%.*]] = insertelement <8 x i16> [[R0]], i16 [[TMP2]], i32 1
+; NO-SVE-NEXT: [[R2:%.*]] = insertelement <8 x i16> [[R1]], i16 [[TMP3]], i32 2
+; NO-SVE-NEXT: [[R3:%.*]] = insertelement <8 x i16> [[R2]], i16 [[TMP4]], i32 3
+; NO-SVE-NEXT: [[R4:%.*]] = insertelement <8 x i16> [[R3]], i16 [[TMP5]], i32 4
+; NO-SVE-NEXT: [[R5:%.*]] = insertelement <8 x i16> [[R4]], i16 [[TMP6]], i32 5
+; NO-SVE-NEXT: [[R6:%.*]] = insertelement <8 x i16> [[R5]], i16 [[TMP7]], i32 6
+; NO-SVE-NEXT: [[R7:%.*]] = insertelemen...
[truncated]
|
alexey-bataev
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
…#113876) Currently, we dont have much tests that show SLP outcome for integer divisions. This patch adds tests for same. In certain scenarios, for Neon, vectorization is profitable. An attempt would be made in future to improve the cost-model for the same.
Currently, we dont have much tests that show SLP outcome for integer divisions. This patch adds tests for same.
In certain scenarios, for Neon, vectorization is profitable. An attempt would be made in future to improve the cost-model for the same.